Index: awk.h =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/awk.h,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 awk.h --- awk.h 13 Dec 2002 04:59:48 -0000 1.1.1.2 +++ awk.h 20 Aug 2004 14:40:31 -0000 @@ -22,6 +22,8 @@ THIS SOFTWARE. ****************************************************************/ +#include + typedef double Awkfloat; /* unsigned char is more trouble than it's worth */ @@ -201,32 +203,12 @@ /* structures used by regular expression matching machinery, mostly b.c: */ -#define NCHARS (256+1) /* 256 handles 8-bit chars; 128 does 7-bit */ - /* watch out in match(), etc. */ -#define NSTATES 32 - -typedef struct rrow { - long ltype; /* long avoids pointer warnings on 64-bit */ - union { - int i; - Node *np; - uschar *up; - } lval; /* because Al stores a pointer in it! */ - int *lfollow; -} rrow; - typedef struct fa { - uschar gototab[NSTATES][NCHARS]; - uschar out[NSTATES]; - uschar *restr; - int *posns[NSTATES]; - int anchor; - int use; - int initstat; - int curstat; - int accept; - int reset; - struct rrow re[1]; /* variable: actual size set by calling malloc */ + uschar *restr; /* regex text */ + int use; /* time last used */ + int cflags; /* flags passed to regcomp() */ + int eflags; /* flags passed to regexec() */ + regex_t re; /* the regex itself */ } fa; Index: b.c =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/b.c,v retrieving revision 1.1.1.7 diff -u -r1.1.1.7 b.c --- b.c 8 Feb 2004 21:32:14 -0000 1.1.1.7 +++ b.c 20 Aug 2004 14:40:37 -0000 @@ -22,10 +22,6 @@ THIS SOFTWARE. ****************************************************************/ -/* lasciate ogne speranza, voi ch'entrate. */ - -#define DEBUG - #include #include #include @@ -33,71 +29,32 @@ #include "awk.h" #include "ytab.h" -#define HAT (NCHARS+2) /* matches ^ in regular expr */ - /* NCHARS is 2**n */ -#define MAXLIN 22 - -#define type(v) (v)->nobj /* badly overloaded here */ -#define info(v) (v)->ntype /* badly overloaded here */ -#define left(v) (v)->narg[0] -#define right(v) (v)->narg[1] -#define parent(v) (v)->nnext - -#define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL: -#define UNARY case STAR: case PLUS: case QUEST: - -/* encoding in tree Nodes: - leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL): - left is index, right contains value or pointer to value - unary (STAR, PLUS, QUEST): left is child, right is null - binary (CAT, OR): left and right are children - parent contains pointer to parent -*/ - - -int *setvec; -int *tmpset; -int maxsetvec = 0; - -int rtok; /* next token in current re */ -int rlxval; -static uschar *rlxstr; -static uschar *prestr; /* current position in current re */ -static uschar *lastre; /* origin of last re */ - -static int setcnt; -static int poscnt; - -char *patbeg; -int patlen; - -#define NFA 20 /* cache this many dynamic fa's */ -fa *fatab[NFA]; -int nfatab = 0; /* entries in fatab */ +char *patbeg; +int patlen; + +#define NFA 20 /* cache this many dynamic fa's */ +static fa *fatab[NFA]; +static int nfatab = 0; /* entries in fatab */ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ { - int i, use, nuse; + int i, use, ncflags, nuse; fa *pfa; static int now = 1; - if (setvec == 0) { /* first time through any RE */ - maxsetvec = MAXLIN; - setvec = (int *) malloc(maxsetvec * sizeof(int)); - tmpset = (int *) malloc(maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space initializing makedfa"); - } + ncflags = REG_EXTENDED; + if (!anchor) + ncflags |= REG_NOSUB; if (compile_time) /* a constant for sure */ - return mkdfa(s, anchor); + return mkdfa(s, ncflags); for (i = 0; i < nfatab; i++) /* is it there already? */ - if (fatab[i]->anchor == anchor + if (fatab[i]->cflags == ncflags && strcmp((const char *) fatab[i]->restr, s) == 0) { fatab[i]->use = now++; return fatab[i]; } - pfa = mkdfa(s, anchor); + pfa = mkdfa(s, ncflags); if (nfatab < NFA) { /* room for another */ fatab[nfatab] = pfa; fatab[nfatab]->use = now++; @@ -111,130 +68,69 @@ use = fatab[i]->use; nuse = i; } - freefa(fatab[nuse]); + free(fatab[nuse]->restr); + regfree(&fatab[nuse]->re); + free(fatab[nuse]); fatab[nuse] = pfa; pfa->use = now++; return pfa; } -fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ -{ - Node *p, *p1; - fa *f; - - p = reparse(s); - p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); - /* put ALL STAR in front of reg. exp. */ - p1 = op2(CAT, p1, op2(FINAL, NIL, NIL)); - /* put FINAL after reg. exp. */ - - poscnt = 0; - penter(p1); /* enter parent pointers and leaf indices */ - if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) - overflo("out of space for fa"); - f->accept = poscnt-1; /* penter has computed number of positions in re */ - cfoll(f, p1); /* set up follow sets */ - freetr(p1); - if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) - overflo("out of space in makedfa"); - if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) - overflo("out of space in makedfa"); - *f->posns[1] = 0; - f->initstat = makeinit(f, anchor); - f->anchor = anchor; - f->restr = (uschar *) tostring(s); - return f; -} - -int makeinit(fa *f, int anchor) +static void badre(fa *pfa, int errcode) { - int i, k; - - f->curstat = 2; - f->out[2] = 0; - f->reset = 0; - k = *(f->re[0].lfollow); - xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in makeinit"); - for (i=0; i <= k; i++) { - (f->posns[2])[i] = (f->re[0].lfollow)[i]; - } - if ((f->posns[2])[1] == f->accept) - f->out[2] = 1; - for (i=0; i < NCHARS; i++) - f->gototab[2][i] = 0; - f->curstat = cgoto(f, 2, HAT); - if (anchor) { - *f->posns[2] = k-1; /* leave out position 0 */ - for (i=0; i < k; i++) { - (f->posns[0])[i] = (f->posns[2])[i]; - } + char msg[256]; - f->out[0] = f->out[2]; - if (f->curstat != 2) - --(*f->posns[f->curstat]); - } - return f->curstat; + regerror(errcode, &pfa->re, msg, sizeof(msg)); + FATAL("bad regular expression: %s", msg); } -void penter(Node *p) /* set up parent pointers and leaf indices */ +fa *mkdfa(const char *s, int cflags) /* does the real work of making a dfa */ { - switch (type(p)) { - LEAF - info(p) = poscnt; - poscnt++; - break; - UNARY - penter(left(p)); - parent(left(p)) = p; - break; - case CAT: - case OR: - penter(left(p)); - penter(right(p)); - parent(left(p)) = p; - parent(right(p)) = p; - break; - default: /* can't happen */ - FATAL("can't happen: unknown type %d in penter", type(p)); - break; - } -} + char *esbase, *esp; + fa *pfa; + int ret; + char c; -void freetr(Node *p) /* free parse tree */ -{ - switch (type(p)) { - LEAF - xfree(p); - break; - UNARY - freetr(left(p)); - xfree(p); - break; - case CAT: - case OR: - freetr(left(p)); - freetr(right(p)); - xfree(p); - break; - default: /* can't happen */ - FATAL("can't happen: unknown type %d in freetr", type(p)); - break; - } + if ((esbase = malloc(2 * strlen(s))) == NULL) + FATAL("out of memory"); + esp = esbase; + while ((c = *s++) != '\0') { + if (c == '\\') { + c = quoted(&s); + /* + * Metacharacters like . have to be seen literally; + * \056 is not a metacharacter, so we must escape it. + */ + if (strchr("()|*+?.^$\\[]", c) != NULL) + *esp++ = '\\'; + } + *esp++ = c; + } + + if ((pfa = malloc(sizeof(*pfa))) == NULL) + FATAL("out of memory"); + pfa->restr = tostring(s); + pfa->use = 0; + pfa->eflags = 0; + pfa->cflags = cflags; + pfa->re.re_endp = esp; + ret = regcomp(&pfa->re, esbase, cflags | REG_PEND); + free(esbase); + if (ret != 0) + badre(pfa, ret); + return (pfa); } -/* in the parsing of regular expressions, metacharacters like . have */ -/* to be seen literally; \056 is not a metacharacter. */ -int hexstr(char **pp) /* find and eval hex string at pp, return new p */ -{ /* only pick up one 8-bit byte (2 chars) */ - uschar *p; +int hexstr(const char **pp) /* find and eval hex string at pp */ + /* and return new p */ + /* only pick up one 8-bit byte (2 chars) */ +{ + const uschar *p; int n = 0; int i; - for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { + for (i = 0, p = (const uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { if (isdigit(*p)) n = 16 * n + *p - '0'; else if (*p >= 'a' && *p <= 'f') @@ -248,10 +144,10 @@ #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ -int quoted(char **pp) /* pick up next thing after a \\ */ - /* and increment *pp */ +int quoted(const char **pp) /* pick up next thing after a \\ */ + /* and increment *pp */ { - char *p = *pp; + const char *p = *pp; int c; if ((c = *p++) == 't') @@ -264,6 +160,10 @@ c = '\r'; else if (c == 'b') c = '\b'; + else if (c == 'a') + c = '\a'; + else if (c == 'v') + c = '\v'; else if (c == '\\') c = '\\'; else if (c == 'x') { /* hexadecimal goo follows */ @@ -282,654 +182,55 @@ return c; } -char *cclenter(const char *argp) /* add a character class */ -{ - int i, c, c2; - uschar *p = (uschar *) argp; - uschar *op, *bp; - static uschar *buf = 0; - static int bufsz = 100; - - op = p; - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) - FATAL("out of space for character class [%.10s...] 1", p); - bp = buf; - for (i = 0; (c = *p++) != 0; ) { - if (c == '\\') { - c = quoted((char **) &p); - } else if (c == '-' && i > 0 && bp[-1] != 0) { - if (*p != 0) { - c = bp[-1]; - c2 = *p++; - if (c2 == '\\') - c2 = quoted((char **) &p); - if (c > c2) { /* empty; ignore */ - bp--; - i--; - continue; - } - while (c < c2) { - if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) - FATAL("out of space for character class [%.10s...] 2", p); - *bp++ = ++c; - i++; - } - continue; - } - } - if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) - FATAL("out of space for character class [%.10s...] 3", p); - *bp++ = c; - i++; - } - *bp = 0; - dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); - xfree(op); - return (char *) tostring((char *) buf); -} - -void overflo(const char *s) -{ - FATAL("regular expression too big: %.30s...", s); -} - -void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ -{ - int i; - int *p; - - switch (type(v)) { - LEAF - f->re[info(v)].ltype = type(v); - f->re[info(v)].lval.np = right(v); - while (f->accept >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in cfoll()"); - } - for (i = 0; i <= f->accept; i++) - setvec[i] = 0; - setcnt = 0; - follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space building follow set"); - f->re[info(v)].lfollow = p; - *p = setcnt; - for (i = f->accept; i >= 0; i--) - if (setvec[i] == 1) - *++p = i; - break; - UNARY - cfoll(f,left(v)); - break; - case CAT: - case OR: - cfoll(f,left(v)); - cfoll(f,right(v)); - break; - default: /* can't happen */ - FATAL("can't happen: unknown type %d in cfoll", type(v)); - } -} - -int first(Node *p) /* collects initially active leaves of p into setvec */ - /* returns 1 if p matches empty string */ -{ - int b, lp; - - switch (type(p)) { - LEAF - lp = info(p); /* look for high-water mark of subscripts */ - while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in first()"); - } - if (setvec[lp] != 1) { - setvec[lp] = 1; - setcnt++; - } - if (type(p) == CCL && (*(char *) right(p)) == '\0') - return(0); /* empty CCL */ - else return(1); - case PLUS: - if (first(left(p)) == 0) return(0); - return(1); - case STAR: - case QUEST: - first(left(p)); - return(0); - case CAT: - if (first(left(p)) == 0 && first(right(p)) == 0) return(0); - return(1); - case OR: - b = first(right(p)); - if (first(left(p)) == 0 || b == 0) return(0); - return(1); - } - FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ - return(-1); -} - -void follow(Node *v) /* collects leaves that can follow v into setvec */ -{ - Node *p; - - if (type(v) == FINAL) - return; - p = parent(v); - switch (type(p)) { - case STAR: - case PLUS: - first(v); - follow(p); - return; - - case OR: - case QUEST: - follow(p); - return; - - case CAT: - if (v == left(p)) { /* v is left child of p */ - if (first(right(p)) == 0) { - follow(p); - return; - } - } else /* v is right child */ - follow(p); - return; - } -} - -int member(int c, const char *sarg) /* is c in s? */ -{ - uschar *s = (uschar *) sarg; - - while (*s) - if (c == *s++) - return(1); - return(0); -} - -int match(fa *f, const char *p0) /* shortest match ? */ +int match(fa *f, const char *p) /* shortest match ? */ { - int s, ns; - uschar *p = (uschar *) p0; + int errcode; - s = f->reset ? makeinit(f,0) : f->initstat; - if (f->out[s]) - return(1); - do { - if ((ns = f->gototab[s][*p]) != 0) - s = ns; - else - s = cgoto(f, s, *p); - if (f->out[s]) - return(1); - } while (*p++ != 0); - return(0); + if ((errcode = regexec(&f->re, p, 0, NULL, f->eflags)) != 0 && + errcode != REG_NOMATCH) + badre(f, errcode); + return (errcode != REG_NOMATCH); } -int pmatch(fa *f, const char *p0) /* longest match, for sub */ +int pmatch(fa *f, const char *p) /* longest match, for sub */ { - int s, ns; - uschar *p = (uschar *) p0; - uschar *q; - int i, k; - - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } - patbeg = (char *) p; - patlen = -1; - do { - q = p; - do { - if (f->out[s]) /* final state */ - patlen = q-p; - if ((ns = f->gototab[s][*q]) != 0) - s = ns; - else - s = cgoto(f, s, *q); - if (s == 1) { /* no transition */ - if (patlen >= 0) { - patbeg = (char *) p; - return(1); - } - else - goto nextin; /* no match */ - } - } while (*q++ != 0); - if (f->out[s]) - patlen = q-p-1; /* don't count $ */ - if (patlen >= 0) { - patbeg = (char *) p; - return(1); - } - nextin: - s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in pmatch"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - } while (*p++ != 0); - return (0); -} + regmatch_t m; + int errcode; -int nematch(fa *f, const char *p0) /* non-empty match, for sub */ -{ - int s, ns; - uschar *p = (uschar *) p0; - uschar *q; - int i, k; - - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } - patlen = -1; - while (*p) { - q = p; - do { - if (f->out[s]) /* final state */ - patlen = q-p; - if ((ns = f->gototab[s][*q]) != 0) - s = ns; - else - s = cgoto(f, s, *q); - if (s == 1) { /* no transition */ - if (patlen > 0) { - patbeg = (char *) p; - return(1); - } else - goto nnextin; /* no nonempty match */ - } - } while (*q++ != 0); - if (f->out[s]) - patlen = q-p-1; /* don't count $ */ - if (patlen > 0 ) { - patbeg = (char *) p; - return(1); - } - nnextin: - s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of state space"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - p++; + if ((errcode = regexec(&f->re, p, 1, &m, f->eflags)) == 0) { + patbeg = (char *)&p[m.rm_so]; + patlen = m.rm_eo - m.rm_so; + return (1); + } else if (errcode == REG_NOMATCH) { + patlen = -1; + return (0); } + badre(f, errcode); + /* not reached */ return (0); } -Node *reparse(const char *p) /* parses regular expression pointed to by p */ -{ /* uses relex() to scan regular expression */ - Node *np; - - dprintf( ("reparse <%s>\n", p) ); - lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ - rtok = relex(); - /* GNU compatibility: an empty regexp matches anything */ - if (rtok == '\0') - /* FATAL("empty regular expression"); previous */ - return(op2(ALL, NIL, NIL)); - np = regexp(); - if (rtok != '\0') - FATAL("syntax error in regular expression %s at %s", lastre, prestr); - return(np); -} - -Node *regexp(void) /* top-level parse of reg expr */ -{ - return (alt(concat(primary()))); -} - -Node *primary(void) -{ - Node *np; - - switch (rtok) { - case CHAR: - np = op2(CHAR, NIL, itonp(rlxval)); - rtok = relex(); - return (unary(np)); - case ALL: - rtok = relex(); - return (unary(op2(ALL, NIL, NIL))); - case DOT: - rtok = relex(); - return (unary(op2(DOT, NIL, NIL))); - case CCL: - np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); - rtok = relex(); - return (unary(np)); - case NCCL: - np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); - rtok = relex(); - return (unary(np)); - case '^': - rtok = relex(); - return (unary(op2(CHAR, NIL, itonp(HAT)))); - case '$': - rtok = relex(); - return (unary(op2(CHAR, NIL, NIL))); - case '(': - rtok = relex(); - if (rtok == ')') { /* special pleading for () */ - rtok = relex(); - return unary(op2(CCL, NIL, (Node *) tostring(""))); - } - np = regexp(); - if (rtok == ')') { - rtok = relex(); - return (unary(np)); - } - else - FATAL("syntax error in regular expression %s at %s", lastre, prestr); - default: - FATAL("illegal primary in regular expression %s at %s", lastre, prestr); - } - return 0; /*NOTREACHED*/ -} - -Node *concat(Node *np) -{ - switch (rtok) { - case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(': - return (concat(op2(CAT, np, primary()))); - } - return (np); -} - -Node *alt(Node *np) -{ - if (rtok == OR) { - rtok = relex(); - return (alt(op2(OR, np, concat(primary())))); - } - return (np); -} - -Node *unary(Node *np) -{ - switch (rtok) { - case STAR: - rtok = relex(); - return (unary(op2(STAR, np, NIL))); - case PLUS: - rtok = relex(); - return (unary(op2(PLUS, np, NIL))); - case QUEST: - rtok = relex(); - return (unary(op2(QUEST, np, NIL))); - default: - return (np); - } -} - -/* - * Character class definitions conformant to the POSIX locale as - * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source - * and operating character sets are both ASCII (ISO646) or supersets - * thereof. - * - * Note that to avoid overflowing the temporary buffer used in - * relex(), the expanded character class (prior to range expansion) - * must be less than twice the size of their full name. - */ - -/* Because isblank doesn't show up in any of the header files on any - * system i use, it's defined here. if some other locale has a richer - * definition of "blank", define HAS_ISBLANK and provide your own - * version. - * the parentheses here are an attempt to find a path through the maze - * of macro definition and/or function and/or version provided. thanks - * to nelson beebe for the suggestion; let's see if it works everywhere. - */ - -#ifndef HAS_ISBLANK - -int (isblank)(int c) -{ - return c==' ' || c=='\t'; -} - -#endif - -struct charclass { - const char *cc_name; - int cc_namelen; - int (*cc_func)(int); -} charclasses[] = { - { "alnum", 5, isalnum }, - { "alpha", 5, isalpha }, - { "blank", 5, isblank }, - { "cntrl", 5, iscntrl }, - { "digit", 5, isdigit }, - { "graph", 5, isgraph }, - { "lower", 5, islower }, - { "print", 5, isprint }, - { "punct", 5, ispunct }, - { "space", 5, isspace }, - { "upper", 5, isupper }, - { "xdigit", 6, isxdigit }, - { NULL, 0, NULL }, -}; - - -int relex(void) /* lexical analyzer for reparse */ -{ - int c, n; - int cflag; - static uschar *buf = 0; - static int bufsz = 100; - uschar *bp; - struct charclass *cc; - int i; - - switch (c = *prestr++) { - case '|': return OR; - case '*': return STAR; - case '+': return PLUS; - case '?': return QUEST; - case '.': return DOT; - case '\0': prestr--; return '\0'; - case '^': - case '$': - case '(': - case ')': - return c; - case '\\': - rlxval = quoted((char **) &prestr); - return CHAR; - default: - rlxval = c; - return CHAR; - case '[': - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) - FATAL("out of space in reg expr %.10s..", lastre); - bp = buf; - if (*prestr == '^') { - cflag = 1; - prestr++; - } - else - cflag = 0; - n = 2 * strlen((const char *) prestr)+1; - if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0)) - FATAL("out of space for reg expr %.10s...", lastre); - for (; ; ) { - if ((c = *prestr++) == '\\') { - *bp++ = '\\'; - if ((c = *prestr++) == '\0') - FATAL("nonterminated character class %.20s...", lastre); - *bp++ = c; - /* } else if (c == '\n') { */ - /* FATAL("newline in character class %.20s...", lastre); */ - } else if (c == '[' && *prestr == ':') { - /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */ - for (cc = charclasses; cc->cc_name; cc++) - if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0) - break; - if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && - prestr[2 + cc->cc_namelen] == ']') { - prestr += cc->cc_namelen + 3; - for (i = 0; i < NCHARS; i++) { - if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) - FATAL("out of space for reg expr %.10s...", lastre); - if (cc->cc_func(i)) { - *bp++ = i; - n++; - } - } - } else - *bp++ = c; - } else if (c == '\0') { - FATAL("nonterminated character class %.20s", lastre); - } else if (bp == buf) { /* 1st char is special */ - *bp++ = c; - } else if (c == ']') { - *bp++ = 0; - rlxstr = (uschar *) tostring((char *) buf); - if (cflag == 0) - return CCL; - else - return NCCL; - } else - *bp++ = c; - } - } -} - -int cgoto(fa *f, int s, int c) -{ - int i, j, k; - int *p, *q; - - while (f->accept >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in cgoto()"); - } - for (i = 0; i <= f->accept; i++) - setvec[i] = 0; - setcnt = 0; - /* compute positions of gototab[s,c] into setvec */ - p = f->posns[s]; - for (i = 1; i <= *p; i++) { - if ((k = f->re[p[i]].ltype) != FINAL) { - if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np)) - || (k == DOT && c != 0 && c != HAT) - || (k == ALL && c != 0) - || (k == CCL && member(c, (char *) f->re[p[i]].lval.up)) - || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) { - q = f->re[p[i]].lfollow; - for (j = 1; j <= *q; j++) { - if (q[j] >= maxsetvec) { - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("cgoto overflow"); - } - if (setvec[q[j]] == 0) { - setcnt++; - setvec[q[j]] = 1; - } - } - } - } - } - /* determine if setvec is a previous state */ - tmpset[0] = setcnt; - j = 1; - for (i = f->accept; i >= 0; i--) - if (setvec[i]) { - tmpset[j++] = i; - } - /* tmpset == previous state? */ - for (i = 1; i <= f->curstat; i++) { - p = f->posns[i]; - if ((k = tmpset[0]) != p[0]) - goto different; - for (j = 1; j <= k; j++) - if (tmpset[j] != p[j]) - goto different; - /* setvec is state i */ - f->gototab[s][c] = i; - return i; - different:; - } - - /* add tmpset to current set of states */ - if (f->curstat >= NSTATES-1) { - f->curstat = 2; - f->reset = 1; - for (i = 2; i < NSTATES; i++) - xfree(f->posns[i]); - } else - ++(f->curstat); - for (i = 0; i < NCHARS; i++) - f->gototab[f->curstat][i] = 0; - xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space in cgoto"); - - f->posns[f->curstat] = p; - f->gototab[s][c] = f->curstat; - for (i = 0; i <= setcnt; i++) - p[i] = tmpset[i]; - if (setvec[f->accept]) - f->out[f->curstat] = 1; - else - f->out[f->curstat] = 0; - return f->curstat; -} - - -void freefa(fa *f) /* free a finite automaton */ +int nematch(fa *f, const char *p) /* non-empty match, for sub */ { - int i; + regmatch_t m; + int errcode; - if (f == NULL) - return; - for (i = 0; i <= f->curstat; i++) - xfree(f->posns[i]); - for (i = 0; i <= f->accept; i++) { - xfree(f->re[i].lfollow); - if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) - xfree((f->re[i].lval.np)); + while ((errcode = regexec(&f->re, p, 1, &m, f->eflags)) == 0 && + m.rm_so == m.rm_eo) { + if (*++p == '\0') { + errcode = REG_NOMATCH; + break; + } + } + if (errcode == 0) { + patbeg = (char *)&p[m.rm_so]; + patlen = m.rm_eo - m.rm_so; + return (1); + } else if (errcode == REG_NOMATCH) { + patlen = -1; + return (0); } - xfree(f->restr); - xfree(f); + badre(f, errcode); + /* not reached */ + return (0); } Index: lib.c =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/lib.c,v retrieving revision 1.1.1.3 diff -u -r1.1.1.3 lib.c --- lib.c 17 Mar 2003 07:59:58 -0000 1.1.1.3 +++ lib.c 20 Aug 2004 14:40:37 -0000 @@ -418,7 +418,6 @@ return 0; pfa = makedfa(fs, 1); dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); - tempstat = pfa->initstat; for (i = 1; ; i++) { if (i > nfields) growfldtab(i); @@ -428,7 +427,7 @@ fldtab[i]->sval = fr; dprintf( ("refldbld: i=%d\n", i) ); if (nematch(pfa, rec)) { - pfa->initstat = 2; /* horrible coupling to b.c */ + pfa->eflags |= REG_NOTBOL; dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); strncpy(fr, rec, patbeg-rec); fr += patbeg - rec + 1; @@ -437,7 +436,7 @@ } else { dprintf( ("no match %s\n", rec) ); strcpy(fr, rec); - pfa->initstat = tempstat; + pfa->eflags &= ~REG_NOTBOL; break; } } Index: main.c =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/main.c,v retrieving revision 1.1.1.9 diff -u -r1.1.1.9 main.c --- main.c 8 Feb 2004 21:32:16 -0000 1.1.1.9 +++ main.c 20 Aug 2004 14:40:37 -0000 @@ -22,7 +22,7 @@ THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20040207"; +const char *version = "version 20040207-FreeBSD"; #define DEBUG #include @@ -55,7 +55,7 @@ { const char *fs = NULL; - setlocale(LC_CTYPE, ""); + setlocale(LC_ALL, ""); setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ cmdname = argv[0]; if (argc == 1) { Index: proto.h =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/proto.h,v retrieving revision 1.1.1.2 diff -u -r1.1.1.2 proto.h --- proto.h 13 Dec 2002 04:59:47 -0000 1.1.1.2 +++ proto.h 20 Aug 2004 14:40:37 -0000 @@ -40,29 +40,11 @@ extern fa *makedfa(const char *, int); extern fa *mkdfa(const char *, int); -extern int makeinit(fa *, int); -extern void penter(Node *); -extern void freetr(Node *); -extern int hexstr(char **); -extern int quoted(char **); -extern char *cclenter(const char *); -extern void overflo(const char *); -extern void cfoll(fa *, Node *); -extern int first(Node *); -extern void follow(Node *); -extern int member(int, const char *); +extern int hexstr(const char **); +extern int quoted(const char **); extern int match(fa *, const char *); extern int pmatch(fa *, const char *); extern int nematch(fa *, const char *); -extern Node *reparse(const char *); -extern Node *regexp(void); -extern Node *primary(void); -extern Node *concat(Node *); -extern Node *alt(Node *); -extern Node *unary(Node *); -extern int relex(void); -extern int cgoto(fa *, int, int); -extern void freefa(fa *); extern int pgetc(void); extern char *cursource(void); Index: run.c =================================================================== RCS file: /home/ncvs/src/contrib/one-true-awk/run.c,v retrieving revision 1.1.1.7 diff -u -r1.1.1.7 run.c --- run.c 8 Feb 2004 21:32:21 -0000 1.1.1.7 +++ run.c 20 Aug 2004 14:40:43 -0000 @@ -1236,8 +1236,7 @@ pfa = makedfa(fs, 1); } if (nematch(pfa,s)) { - tempstat = pfa->initstat; - pfa->initstat = 2; + pfa->eflags |= REG_NOTBOL; do { n++; sprintf(num, "%d", n); @@ -1253,7 +1252,7 @@ n++; sprintf(num, "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); - pfa->initstat = tempstat; + pfa->eflags &= ~REG_NOTBOL; goto spdone; } } while (nematch(pfa,s)); @@ -1817,8 +1816,7 @@ } y = execute(a[2]); /* replacement string */ if (pmatch(pfa, t)) { - tempstat = pfa->initstat; - pfa->initstat = 2; + pfa->eflags |= REG_NOTBOL; pb = buf; rptr = getsval(y); do { @@ -1882,7 +1880,7 @@ FATAL("gsub result2 %.30s too big; can't happen", buf); *pb = '\0'; setsval(x, buf); /* BUG: should be able to avoid copy + free */ - pfa->initstat = tempstat; + pfa->eflags &= ~REG_NOTBOL; } tempfree(x); tempfree(y);