#include #include #include #define MAX_ENDS 10 #define S_SPLIT 256 #define S_MATCH 257 /* Operators, ordered by ascending precedence */ typedef enum ops { OP_START, /* Sentinel, implicit */ OP_END, /* Implicit */ OP_CHAR, OP_RPAR, OP_LPAR, OP_OR, OP_CONCAT, /* Implicit */ OP_STAR, } optype_t; struct state { int c; struct state *next; struct state *next2; }; struct expr { struct state *start; struct state **ptrs[MAX_ENDS]; int nptrs; }; int tokval; struct expr exprstack[10]; struct expr *exprp = exprstack; optype_t opstack[10]; optype_t *opp = opstack; int need_concat; static struct state * new_state(int c, struct state *next, struct state *next2) { struct state *s; s = malloc(sizeof(struct state)); s->c = c; s->next = next; s->next2 = next2; return (s); } void patch(struct expr *e, struct state *s) { int i; for (i = 0; i < e->nptrs; i++) *e->ptrs[i] = s; } optype_t next_token(char **s) { char c; c = *(*s)++; switch (c) { case '\0': return (OP_END); case '(': return (OP_LPAR); case ')': return (OP_RPAR); case '*': return (OP_STAR); case '|': return (OP_OR); case '\\': tokval = *(*s)++; return (OP_CHAR); default: tokval = c; return (OP_CHAR); } } void push_expr(struct state *start, struct state **ptr) { struct expr *e; e = exprp++; e->start = start; *e->ptrs = ptr; e->nptrs = 1; } void push_expr_arr(struct state *start, struct state **ptrs[], int n) { struct expr *e; e = exprp++; e->start = start; e->nptrs = n; memcpy(e->ptrs, ptrs, n * sizeof(struct state *)); } struct expr * pop_expr() { return (--exprp); } void push_op(optype_t op) { *opp++ = op; } optype_t pop_op() { *opp = OP_START; return (*--opp); } void evalopsuntil(optype_t op) { struct state *new; struct expr *e1, *e2; optype_t cur; int i; while (*(opp-1) >= op) { cur = pop_op(); if (cur == OP_STAR) { e1 = pop_expr(); new = new_state(S_SPLIT, e1->start, NULL); patch(e1, new); push_expr(new, &new->next2); } else if (cur == OP_CONCAT) { e2 = pop_expr(); e1 = pop_expr(); patch(e1, e2->start); push_expr_arr(e1->start, e2->ptrs, e2->nptrs); } else if (cur == OP_OR) { e2 = pop_expr(); e1 = pop_expr(); new = new_state(S_SPLIT, e1->start, e2->start); /* XXX overflow */ for (i = 0; i < e2->nptrs; i++) e1->ptrs[e1->nptrs + i] = e2->ptrs[i]; push_expr_arr(new, e1->ptrs, e1->nptrs + e2->nptrs); } else if (cur == OP_START) break; } } void operator(optype_t op) { if (*(opp - 1) >= op) { evalopsuntil(op); if (op != OP_RPAR) push_op(op); } else push_op(op); if (op == OP_RPAR && need_concat) operator(OP_CONCAT); if (op == OP_STAR || op == OP_RPAR) need_concat = 1; else need_concat = 0; } struct state * compile(char *s) { struct state *new, *start; struct expr *e; int op; need_concat = 0; start = NULL; opp = opstack; exprp = exprstack; push_op(OP_START); while (1) { op = next_token(&s); if (op == OP_END) break; if (op == OP_CHAR) { if (need_concat) operator(OP_CONCAT); new = new_state(tokval, NULL, NULL); push_expr(new, &new->next); need_concat = 1; } else operator(op); } evalopsuntil(OP_START); e = pop_expr(); new = new_state(S_MATCH, NULL, NULL); patch(e, new); return (e->start); } int main(void) { compile("a(b|c)*d"); compile("abcd"); compile("a(b|c)d"); compile("a(bb)*a"); compile("a(bc)d"); compile("ab*cd"); compile("a(bc)*d"); compile("a(b|c)d"); compile("a(bc|de)fg"); return (0); }