acme: regexp fix (see libregexp change)

This commit is contained in:
Russ Cox 2007-12-07 15:33:38 -05:00
parent a7511dd43d
commit 6f16e7fc1b

View file

@ -521,26 +521,56 @@ classmatch(int classno, int c, int negate)
} }
/* /*
* Note optimization in addinst: * Add inst to the list [l, l+NLIST), but only if it is not there already.
* *l must be pending when addinst called; if *l has been looked * These work lists are stored and processed in increasing
* at already, the optimization is a bug. * order of sep->p[0], so if the inst is there already, the one
* there already is a more left match and takes priority.
* (This works for backward searches too, because there
* is no explicit comparison.)
*/ */
int Ilist*
addinst(Ilist *l, Inst *inst, Rangeset *sep) addinst1(Ilist *l, Inst *inst, Rangeset *sep)
{ {
Ilist *p; Ilist *p;
for(p = l; p->inst; p++){ for(p = l; p->inst; p++)
if(p->inst==inst){ if(p->inst==inst)
if((sep)->r[0].q0 < p->se.r[0].q0) return 0;
p->se= *sep; /* this would be bug */
return 0; /* It's already there */ if(p == l+NLIST)
return l+NLIST;
p->inst = inst;
p->se = *sep;
(p+1)->inst = 0;
return p;
}
int
addinst(Ilist *l, Inst *inst, Rangeset *sep)
{
Ilist *ap;
ap = addinst1(l, inst, sep);
if(ap == 0)
return 0;
if(ap == l+NLIST)
return -1;
/*
* Added inst to list at ap.
* Expand any ORs right now, so that entire
* work list ends up being sorted by increasing sep->p[0].
*/
for(; ap->inst; ap++){
if(ap->inst->type == OR){
if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
return -1;
if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
return -1;
} }
} }
p->inst = inst; return 0;
p->se= *sep;
(p+1)->inst = nil;
return 1;
} }
int int
@ -557,7 +587,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
Inst *inst; Inst *inst;
Ilist *tlp; Ilist *tlp;
uint p; uint p;
int nnl, ntl;
int nc, c; int nc, c;
int wrapped; int wrapped;
int startchar; int startchar;
@ -566,7 +595,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
p = startp; p = startp;
startchar = 0; startchar = 0;
wrapped = 0; wrapped = 0;
nnl = 0;
if(startinst->type<OPERATOR) if(startinst->type<OPERATOR)
startchar = startinst->type; startchar = startinst->type;
list[0][0].inst = list[1][0].inst = nil; list[0][0].inst = list[1][0].inst = nil;
@ -576,6 +604,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
else else
nc = runestrlen(r); nc = runestrlen(r);
/* Execute machine once for each character */ /* Execute machine once for each character */
nl = list[0];
for(;;p++){ for(;;p++){
doloop: doloop:
if(p>=eof || p>=nc){ if(p>=eof || p>=nc){
@ -594,7 +623,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
} }
c = 0; c = 0;
}else{ }else{
if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0) if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
break; break;
if(t != nil) if(t != nil)
c = textreadc(t, p); c = textreadc(t, p);
@ -602,18 +631,15 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
c = r[p]; c = r[p];
} }
/* fast check for first char */ /* fast check for first char */
if(startchar && nnl==0 && c!=startchar) if(startchar && nl->inst==0 && c!=startchar)
continue; continue;
tl = list[flag]; tl = list[flag];
nl = list[flag^=1]; nl = list[flag^=1];
nl->inst = nil; nl->inst = nil;
ntl = nnl;
nnl = 0;
if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){ if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
/* Add first instruction to this list */ /* Add first instruction to this list */
sempty.r[0].q0 = p; sempty.r[0].q0 = p;
if(addinst(tl, startinst, &sempty)) if(addinst(tl, startinst, &sempty) < 0){
if(++ntl >= NLIST){
Overflow: Overflow:
warning(nil, "regexp list overflow\n"); warning(nil, "regexp list overflow\n");
sel.r[0].q0 = -1; sel.r[0].q0 = -1;
@ -627,8 +653,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
default: /* regular character */ default: /* regular character */
if(inst->type==c){ if(inst->type==c){
Addinst: Addinst:
if(addinst(nl, inst->u1.next, &tlp->se)) if(addinst(nl, inst->u1.next, &tlp->se) < 0)
if(++nnl >= NLIST)
goto Overflow; goto Overflow;
} }
break; break;
@ -666,13 +691,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
goto Addinst; goto Addinst;
break; break;
case OR: case OR:
/* evaluate right choice later */ /* already expanded; just a placeholder */
if(addinst(tl, inst->u.right, &tlp->se)) break;
if(++ntl >= NLIST)
goto Overflow;
/* efficiency: advance and re-evaluate */
inst = inst->u1.left;
goto Switchstmt;
case END: /* Match! */ case END: /* Match! */
tlp->se.r[0].q1 = p; tlp->se.r[0].q1 = p;
newmatch(&tlp->se); newmatch(&tlp->se);
@ -700,13 +720,11 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
Inst *inst; Inst *inst;
Ilist *tlp; Ilist *tlp;
int p; int p;
int nnl, ntl;
int c; int c;
int wrapped; int wrapped;
int startchar; int startchar;
flag = 0; flag = 0;
nnl = 0;
wrapped = 0; wrapped = 0;
p = startp; p = startp;
startchar = 0; startchar = 0;
@ -715,6 +733,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
list[0][0].inst = list[1][0].inst = nil; list[0][0].inst = list[1][0].inst = nil;
sel.r[0].q0= -1; sel.r[0].q0= -1;
/* Execute machine once for each character, including terminal NUL */ /* Execute machine once for each character, including terminal NUL */
nl = list[0];
for(;;--p){ for(;;--p){
doloop: doloop:
if(p <= 0){ if(p <= 0){
@ -734,24 +753,20 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
} }
c = 0; c = 0;
}else{ }else{
if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0) if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
break; break;
c = textreadc(t, p-1); c = textreadc(t, p-1);
} }
/* fast check for first char */ /* fast check for first char */
if(startchar && nnl==0 && c!=startchar) if(startchar && nl->inst==0 && c!=startchar)
continue; continue;
tl = list[flag]; tl = list[flag];
nl = list[flag^=1]; nl = list[flag^=1];
nl->inst = nil; nl->inst = nil;
ntl = nnl;
nnl = 0;
if(sel.r[0].q0<0 && (!wrapped || p>startp)){ if(sel.r[0].q0<0 && (!wrapped || p>startp)){
/* Add first instruction to this list */ /* Add first instruction to this list */
/* the minus is so the optimizations in addinst work */ sempty.r[0].q0 = p;
sempty.r[0].q0 = -p; if(addinst(tl, bstartinst, &sempty) < 0){
if(addinst(tl, bstartinst, &sempty))
if(++ntl >= NLIST){
Overflow: Overflow:
warning(nil, "regexp list overflow\n"); warning(nil, "regexp list overflow\n");
sel.r[0].q0 = -1; sel.r[0].q0 = -1;
@ -765,8 +780,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
default: /* regular character */ default: /* regular character */
if(inst->type == c){ if(inst->type == c){
Addinst: Addinst:
if(addinst(nl, inst->u1.next, &tlp->se)) if(addinst(nl, inst->u1.next, &tlp->se) < 0)
if(++nnl >= NLIST)
goto Overflow; goto Overflow;
} }
break; break;
@ -804,15 +818,9 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
goto Addinst; goto Addinst;
break; break;
case OR: case OR:
/* evaluate right choice later */ /* already expanded; just a placeholder */
if(addinst(tl, inst->u.right, &tlp->se)) break;
if(++ntl >= NLIST)
goto Overflow;
/* efficiency: advance and re-evaluate */
inst = inst->u1.left;
goto Switchstmt;
case END: /* Match! */ case END: /* Match! */
tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
tlp->se.r[0].q1 = p; tlp->se.r[0].q1 = p;
bnewmatch(&tlp->se); bnewmatch(&tlp->se);
break; break;