libregexp: revert regexp fix

This commit is contained in:
Russ Cox 2007-12-07 17:33:41 -05:00
parent 2deda14e42
commit 6d08a0f548
5 changed files with 113 additions and 117 deletions

View file

@ -23,89 +23,90 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
} }
/* /*
* Add ip to the list [lp, elp], but only if it is not there already. * Note optimization in _renewthread:
* These work lists are stored and processed in increasing * *lp must be pending when _renewthread called; if *l has been looked
* order of sp[0], so if the ip is there already, the one that's * at already, the optimization is a bug.
* there already is a more left match and takes priority.
*/ */
static Relist* extern Relist*
_renewthread1(Relist *lp, /* Relist to add to */ _renewthread(Relist *lp, /* _relist to add to */
Relist *elp, /* limit pointer for Relist */
Reinst *ip, /* instruction to add */ Reinst *ip, /* instruction to add */
int ms, int ms,
Resublist *sep) /* pointers to subexpressions */ Resublist *sep) /* pointers to subexpressions */
{ {
Relist *p; Relist *p;
for(p=lp; p->inst; p++) for(p=lp; p->inst; p++){
if(p->inst == ip) if(p->inst == ip){
if(sep->m[0].s.sp < p->se.m[0].s.sp){
if(ms > 1)
p->se = *sep;
else
p->se.m[0] = sep->m[0];
}
return 0; return 0;
}
if(p == elp) /* refuse to overflow buffer */ }
return elp;
p->inst = ip; p->inst = ip;
if(ms > 1) if(ms > 1)
p->se = *sep; p->se = *sep;
else else
p->se.m[0] = sep->m[0]; p->se.m[0] = sep->m[0];
(p+1)->inst = 0; (++p)->inst = 0;
return p; return p;
} }
extern int
_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
{
Relist *ap;
ap = _renewthread1(lp, elp, ip, ms, sep);
if(ap == 0)
return 0;
if(ap == elp)
return -1;
/*
* Added ip to list at ap.
* Expand any ORs right now, so that entire
* work list ends up being sorted by increasing m[0].sp.
*/
for(; ap->inst; ap++){
if(ap->inst->type == OR){
if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
return -1;
if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
return -1;
}
}
return 0;
}
/* /*
* same as renewthread, but called with * same as renewthread, but called with
* initial empty start pointer. * initial empty start pointer.
*/ */
extern int extern Relist*
_renewemptythread(Relist *lp, /* _relist to add to */ _renewemptythread(Relist *lp, /* _relist to add to */
Relist *elp,
Reinst *ip, /* instruction to add */ Reinst *ip, /* instruction to add */
int ms, int ms,
char *sp) /* pointers to subexpressions */ char *sp) /* pointers to subexpressions */
{ {
Resublist sep; Relist *p;
for(p=lp; p->inst; p++){
if(p->inst == ip){
if(sp < p->se.m[0].s.sp) {
if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.sp = sp;
}
return 0;
}
}
p->inst = ip;
if(ms > 1) if(ms > 1)
memset(&sep, 0, sizeof sep); memset(&p->se, 0, sizeof(p->se));
sep.m[0].s.sp = sp; p->se.m[0].s.sp = sp;
sep.m[0].e.ep = 0; (++p)->inst = 0;
return _renewthread(lp, elp, ip, ms, &sep); return p;
} }
extern int extern Relist*
_rrenewemptythread(Relist *lp, /* _relist to add to */ _rrenewemptythread(Relist *lp, /* _relist to add to */
Relist *elp,
Reinst *ip, /* instruction to add */ Reinst *ip, /* instruction to add */
int ms, int ms,
Rune *rsp) /* pointers to subexpressions */ Rune *rsp) /* pointers to subexpressions */
{ {
return _renewemptythread(lp, elp, ip, ms, (char*)rsp); Relist *p;
for(p=lp; p->inst; p++){
if(p->inst == ip){
if(rsp < p->se.m[0].s.rsp) {
if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
}
return 0;
}
}
p->inst = ip;
if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
(++p)->inst = 0;
return p;
} }

View file

@ -232,7 +232,7 @@ optimize(Reprog *pp)
int size; int size;
Reprog *npp; Reprog *npp;
Reclass *cl; Reclass *cl;
int diff, proglen; int diff;
/* /*
* get rid of NOOP chains * get rid of NOOP chains
@ -249,13 +249,10 @@ optimize(Reprog *pp)
* necessary. Reallocate to the actual space used * necessary. Reallocate to the actual space used
* and then relocate the code. * and then relocate the code.
*/ */
proglen = freep - pp->firstinst; size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
size = sizeof(Reprog) + proglen*sizeof(Reinst);
npp = realloc(pp, size); npp = realloc(pp, size);
if(npp==0 || npp==pp){ if(npp==0 || npp==pp)
pp->proglen = proglen;
return pp; return pp;
}
diff = (char *)npp - (char *)pp; diff = (char *)npp - (char *)pp;
freep = (Reinst *)((char *)freep + diff); freep = (Reinst *)((char *)freep + diff);
for(inst=npp->firstinst; inst<freep; inst++){ for(inst=npp->firstinst; inst<freep; inst++){
@ -276,7 +273,6 @@ optimize(Reprog *pp)
*(char**)(void*)&inst->u2.left += diff; *(char**)(void*)&inst->u2.left += diff;
} }
*(char**)(void*)&npp->startinst += diff; *(char**)(void*)&npp->startinst += diff;
npp->proglen = proglen;
return npp; return npp;
} }

View file

@ -68,7 +68,7 @@ struct Reljunk
Rune* reol; Rune* reol;
}; };
extern int _renewthread(Relist*, Relist*, Reinst*, int, Resublist*); extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
extern void _renewmatch(Resub*, int, Resublist*); extern void _renewmatch(Resub*, int, Resublist*);
extern int _renewemptythread(Relist*, Relist*, Reinst*, int, char*); extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
extern int _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*); extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);

View file

@ -2,6 +2,7 @@
#include "regexp9.h" #include "regexp9.h"
#include "regcomp.h" #include "regcomp.h"
/* /*
* return 0 if no match * return 0 if no match
* >0 if a match * >0 if a match
@ -12,14 +13,16 @@ regexec1(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */ char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */ Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */ int ms, /* number of elements at mp */
Reljunk *j) Reljunk *j
)
{ {
int flag=0; int flag=0;
Reinst *inst; Reinst *inst;
Relist *tlp; Relist *tlp;
char *s; char *s;
int i, checkstart, n; int i, checkstart;
Rune r, *rp, *ep; Rune r, *rp, *ep;
int n;
Relist* tl; /* This list, next list */ Relist* tl; /* This list, next list */
Relist* nl; Relist* nl;
Relist* tle; /* ends of this and next list */ Relist* tle; /* ends of this and next list */
@ -45,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
switch(j->starttype) { switch(j->starttype) {
case RUNE: case RUNE:
p = utfrune(s, j->startchar); p = utfrune(s, j->startchar);
if(p == 0 || (j->eol && p >= j->eol)) if(p == 0 || s == j->eol)
return match; return match;
s = p; s = p;
break; break;
@ -53,7 +56,7 @@ regexec1(Reprog *progp, /* program to run */
if(s == bol) if(s == bol)
break; break;
p = utfrune(s, '\n'); p = utfrune(s, '\n');
if(p == 0 || (j->eol && p+1 >= j->eol)) if(p == 0 || s == j->eol)
return match; return match;
s = p+1; s = p+1;
break; break;
@ -74,16 +77,17 @@ regexec1(Reprog *progp, /* program to run */
/* Add first instruction to current list */ /* Add first instruction to current list */
if(match == 0) if(match == 0)
_renewemptythread(tl, tle, progp->startinst, ms, s); _renewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */ /* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
for(inst = tlp->inst; ; inst = inst->u2.next){ for(inst = tlp->inst; ; inst = inst->u2.next){
switch(inst->type){ switch(inst->type){
case RUNE: /* regular character */ case RUNE: /* regular character */
if(inst->u1.r == r) if(inst->u1.r == r){
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
}
break; break;
case LBRA: case LBRA:
tlp->se.m[inst->u1.subid].s.sp = s; tlp->se.m[inst->u1.subid].s.sp = s;
@ -93,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
continue; continue;
case ANY: case ANY:
if(r != '\n') if(r != '\n')
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case ANYNL: case ANYNL:
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case BOL: case BOL:
@ -112,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end; ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2) for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){ if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
} }
@ -123,12 +127,15 @@ regexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1]) if(r >= rp[0] && r <= rp[1])
break; break;
if(rp == ep) if(rp == ep)
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case OR: case OR:
/* expanded during renewthread; just a place holder */ /* evaluate right choice later */
break; if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */ case END: /* Match! */
match = 1; match = 1;
tlp->se.m[0].e.ep = s; tlp->se.m[0].e.ep = s;
@ -158,18 +165,19 @@ regexec2(Reprog *progp, /* program to run */
int rv; int rv;
Relist *relist0, *relist1; Relist *relist0, *relist1;
relist0 = malloc((progp->proglen+1)*sizeof(Relist)); /* mark space */
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist0 == nil) if(relist0 == nil)
return -1; return -1;
relist1 = malloc((progp->proglen+1)*sizeof(Relist)); relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist1 == nil){ if(relist1 == nil){
free(relist1); free(relist1);
return -1; return -1;
} }
j->relist[0] = relist0; j->relist[0] = relist0;
j->relist[1] = relist1; j->relist[1] = relist1;
j->reliste[0] = relist0 + progp->proglen; j->reliste[0] = relist0 + BIGLISTSIZE - 2;
j->reliste[1] = relist1 + progp->proglen; j->reliste[1] = relist1 + BIGLISTSIZE - 2;
rv = regexec1(progp, bol, mp, ms, j); rv = regexec1(progp, bol, mp, ms, j);
free(relist0); free(relist0);
@ -210,8 +218,8 @@ regexec(Reprog *progp, /* program to run */
/* mark space */ /* mark space */
j.relist[0] = relist0; j.relist[0] = relist0;
j.relist[1] = relist1; j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 1; j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 1; j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = regexec1(progp, bol, mp, ms, &j); rv = regexec1(progp, bol, mp, ms, &j);
if(rv >= 0) if(rv >= 0)

View file

@ -9,9 +9,9 @@
*/ */
static int static int
rregexec1(Reprog *progp, /* program to run */ rregexec1(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */ Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */ Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */ int ms, /* number of elements at mp */
Reljunk *j) Reljunk *j)
{ {
int flag=0; int flag=0;
@ -28,7 +28,7 @@ rregexec1(Reprog *progp, /* program to run */
Rune *p; Rune *p;
match = 0; match = 0;
checkstart = j->starttype; checkstart = j->startchar;
if(mp) if(mp)
for(i=0; i<ms; i++) { for(i=0; i<ms; i++) {
mp[i].s.rsp = 0; mp[i].s.rsp = 0;
@ -46,7 +46,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(j->starttype) { switch(j->starttype) {
case RUNE: case RUNE:
p = runestrchr(s, j->startchar); p = runestrchr(s, j->startchar);
if(p == 0 || (j->reol && p >= j->reol)) if(p == 0 || p == j->reol)
return match; return match;
s = p; s = p;
break; break;
@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
if(s == bol) if(s == bol)
break; break;
p = runestrchr(s, '\n'); p = runestrchr(s, '\n');
if(p == 0 || (j->reol && p+1 >= j->reol)) if(p == 0 || s == j->reol)
return match; return match;
s = p+1; s = p+1;
break; break;
@ -71,16 +71,15 @@ rregexec1(Reprog *progp, /* program to run */
nl->inst = 0; nl->inst = 0;
/* Add first instruction to current list */ /* Add first instruction to current list */
if(match == 0) _rrenewemptythread(tl, progp->startinst, ms, s);
_rrenewemptythread(tl, tle, progp->startinst, ms, s);
/* Execute machine until current list is empty */ /* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ for(tlp=tl; tlp->inst; tlp++){
for(inst = tlp->inst; ; inst = inst->u2.next){ for(inst=tlp->inst; ; inst = inst->u2.next){
switch(inst->type){ switch(inst->type){
case RUNE: /* regular character */ case RUNE: /* regular character */
if(inst->u1.r == r) if(inst->u1.r == r)
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case LBRA: case LBRA:
@ -91,11 +90,11 @@ rregexec1(Reprog *progp, /* program to run */
continue; continue;
case ANY: case ANY:
if(r != '\n') if(r != '\n')
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case ANYNL: case ANYNL:
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case BOL: case BOL:
@ -110,7 +109,7 @@ rregexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end; ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2) for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){ if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
} }
@ -121,12 +120,15 @@ rregexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1]) if(r >= rp[0] && r <= rp[1])
break; break;
if(rp == ep) if(rp == ep)
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case OR: case OR:
/* expanded during renewthread; just a place holder */ /* evaluate right choice later */
break; if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
case END: /* Match! */ case END: /* Match! */
match = 1; match = 1;
tlp->se.m[0].e.rep = s; tlp->se.m[0].e.rep = s;
@ -139,7 +141,7 @@ rregexec1(Reprog *progp, /* program to run */
} }
if(s == j->reol) if(s == j->reol)
break; break;
checkstart = j->starttype && nl->inst==0; checkstart = j->startchar && nl->inst==0;
s++; s++;
}while(r); }while(r);
return match; return match;
@ -153,26 +155,15 @@ rregexec2(Reprog *progp, /* program to run */
Reljunk *j Reljunk *j
) )
{ {
int rv; Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
Relist *relist0, *relist1;
relist0 = malloc((progp->proglen+1)*sizeof(Relist)); /* mark space */
if(relist0 == nil)
return -1;
relist1 = malloc((progp->proglen+1)*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0; j->relist[0] = relist0;
j->relist[1] = relist1; j->relist[1] = relist1;
j->reliste[0] = relist0 + progp->proglen; j->reliste[0] = relist0 + nelem(relist0) - 2;
j->reliste[1] = relist1 + progp->proglen; j->reliste[1] = relist1 + nelem(relist1) - 2;
rv = rregexec1(progp, bol, mp, ms, j); return rregexec1(progp, bol, mp, ms, j);
free(relist0);
free(relist1);
return rv;
} }
extern int extern int
@ -208,8 +199,8 @@ rregexec(Reprog *progp, /* program to run */
/* mark space */ /* mark space */
j.relist[0] = relist0; j.relist[0] = relist0;
j.relist[1] = relist1; j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 1; j.reliste[0] = relist0 + nelem(relist0) - 2;
j.reliste[1] = relist1 + nelem(relist1) - 1; j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = rregexec1(progp, bol, mp, ms, &j); rv = rregexec1(progp, bol, mp, ms, &j);
if(rv >= 0) if(rv >= 0)