acme: regexp fix (see libregexp change)
This commit is contained in:
parent
a7511dd43d
commit
6f16e7fc1b
1 changed files with 59 additions and 51 deletions
|
|
@ -521,26 +521,56 @@ classmatch(int classno, int c, int negate)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note optimization in addinst:
|
* Add inst to the list [l, l+NLIST), but only if it is not there already.
|
||||||
* *l must be pending when addinst called; if *l has been looked
|
* These work lists are stored and processed in increasing
|
||||||
* at already, the optimization is a bug.
|
* order of sep->p[0], so if the inst is there already, the one
|
||||||
|
* there already is a more left match and takes priority.
|
||||||
|
* (This works for backward searches too, because there
|
||||||
|
* is no explicit comparison.)
|
||||||
*/
|
*/
|
||||||
int
|
Ilist*
|
||||||
addinst(Ilist *l, Inst *inst, Rangeset *sep)
|
addinst1(Ilist *l, Inst *inst, Rangeset *sep)
|
||||||
{
|
{
|
||||||
Ilist *p;
|
Ilist *p;
|
||||||
|
|
||||||
for(p = l; p->inst; p++){
|
for(p = l; p->inst; p++)
|
||||||
if(p->inst==inst){
|
if(p->inst==inst)
|
||||||
if((sep)->r[0].q0 < p->se.r[0].q0)
|
return 0;
|
||||||
p->se= *sep; /* this would be bug */
|
|
||||||
return 0; /* It's already there */
|
if(p == l+NLIST)
|
||||||
|
return l+NLIST;
|
||||||
|
|
||||||
|
p->inst = inst;
|
||||||
|
p->se = *sep;
|
||||||
|
(p+1)->inst = 0;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
addinst(Ilist *l, Inst *inst, Rangeset *sep)
|
||||||
|
{
|
||||||
|
Ilist *ap;
|
||||||
|
|
||||||
|
ap = addinst1(l, inst, sep);
|
||||||
|
if(ap == 0)
|
||||||
|
return 0;
|
||||||
|
if(ap == l+NLIST)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Added inst to list at ap.
|
||||||
|
* Expand any ORs right now, so that entire
|
||||||
|
* work list ends up being sorted by increasing sep->p[0].
|
||||||
|
*/
|
||||||
|
for(; ap->inst; ap++){
|
||||||
|
if(ap->inst->type == OR){
|
||||||
|
if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
|
||||||
|
return -1;
|
||||||
|
if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p->inst = inst;
|
return 0;
|
||||||
p->se= *sep;
|
|
||||||
(p+1)->inst = nil;
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
@ -557,7 +587,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
Inst *inst;
|
Inst *inst;
|
||||||
Ilist *tlp;
|
Ilist *tlp;
|
||||||
uint p;
|
uint p;
|
||||||
int nnl, ntl;
|
|
||||||
int nc, c;
|
int nc, c;
|
||||||
int wrapped;
|
int wrapped;
|
||||||
int startchar;
|
int startchar;
|
||||||
|
|
@ -566,7 +595,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
p = startp;
|
p = startp;
|
||||||
startchar = 0;
|
startchar = 0;
|
||||||
wrapped = 0;
|
wrapped = 0;
|
||||||
nnl = 0;
|
|
||||||
if(startinst->type<OPERATOR)
|
if(startinst->type<OPERATOR)
|
||||||
startchar = startinst->type;
|
startchar = startinst->type;
|
||||||
list[0][0].inst = list[1][0].inst = nil;
|
list[0][0].inst = list[1][0].inst = nil;
|
||||||
|
|
@ -576,6 +604,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
else
|
else
|
||||||
nc = runestrlen(r);
|
nc = runestrlen(r);
|
||||||
/* Execute machine once for each character */
|
/* Execute machine once for each character */
|
||||||
|
nl = list[0];
|
||||||
for(;;p++){
|
for(;;p++){
|
||||||
doloop:
|
doloop:
|
||||||
if(p>=eof || p>=nc){
|
if(p>=eof || p>=nc){
|
||||||
|
|
@ -594,7 +623,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
}
|
}
|
||||||
c = 0;
|
c = 0;
|
||||||
}else{
|
}else{
|
||||||
if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
|
if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
|
||||||
break;
|
break;
|
||||||
if(t != nil)
|
if(t != nil)
|
||||||
c = textreadc(t, p);
|
c = textreadc(t, p);
|
||||||
|
|
@ -602,18 +631,15 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
c = r[p];
|
c = r[p];
|
||||||
}
|
}
|
||||||
/* fast check for first char */
|
/* fast check for first char */
|
||||||
if(startchar && nnl==0 && c!=startchar)
|
if(startchar && nl->inst==0 && c!=startchar)
|
||||||
continue;
|
continue;
|
||||||
tl = list[flag];
|
tl = list[flag];
|
||||||
nl = list[flag^=1];
|
nl = list[flag^=1];
|
||||||
nl->inst = nil;
|
nl->inst = nil;
|
||||||
ntl = nnl;
|
|
||||||
nnl = 0;
|
|
||||||
if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
|
if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
|
||||||
/* Add first instruction to this list */
|
/* Add first instruction to this list */
|
||||||
sempty.r[0].q0 = p;
|
sempty.r[0].q0 = p;
|
||||||
if(addinst(tl, startinst, &sempty))
|
if(addinst(tl, startinst, &sempty) < 0){
|
||||||
if(++ntl >= NLIST){
|
|
||||||
Overflow:
|
Overflow:
|
||||||
warning(nil, "regexp list overflow\n");
|
warning(nil, "regexp list overflow\n");
|
||||||
sel.r[0].q0 = -1;
|
sel.r[0].q0 = -1;
|
||||||
|
|
@ -627,8 +653,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
default: /* regular character */
|
default: /* regular character */
|
||||||
if(inst->type==c){
|
if(inst->type==c){
|
||||||
Addinst:
|
Addinst:
|
||||||
if(addinst(nl, inst->u1.next, &tlp->se))
|
if(addinst(nl, inst->u1.next, &tlp->se) < 0)
|
||||||
if(++nnl >= NLIST)
|
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -666,13 +691,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
||||||
goto Addinst;
|
goto Addinst;
|
||||||
break;
|
break;
|
||||||
case OR:
|
case OR:
|
||||||
/* evaluate right choice later */
|
/* already expanded; just a placeholder */
|
||||||
if(addinst(tl, inst->u.right, &tlp->se))
|
break;
|
||||||
if(++ntl >= NLIST)
|
|
||||||
goto Overflow;
|
|
||||||
/* efficiency: advance and re-evaluate */
|
|
||||||
inst = inst->u1.left;
|
|
||||||
goto Switchstmt;
|
|
||||||
case END: /* Match! */
|
case END: /* Match! */
|
||||||
tlp->se.r[0].q1 = p;
|
tlp->se.r[0].q1 = p;
|
||||||
newmatch(&tlp->se);
|
newmatch(&tlp->se);
|
||||||
|
|
@ -700,13 +720,11 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
||||||
Inst *inst;
|
Inst *inst;
|
||||||
Ilist *tlp;
|
Ilist *tlp;
|
||||||
int p;
|
int p;
|
||||||
int nnl, ntl;
|
|
||||||
int c;
|
int c;
|
||||||
int wrapped;
|
int wrapped;
|
||||||
int startchar;
|
int startchar;
|
||||||
|
|
||||||
flag = 0;
|
flag = 0;
|
||||||
nnl = 0;
|
|
||||||
wrapped = 0;
|
wrapped = 0;
|
||||||
p = startp;
|
p = startp;
|
||||||
startchar = 0;
|
startchar = 0;
|
||||||
|
|
@ -715,6 +733,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
||||||
list[0][0].inst = list[1][0].inst = nil;
|
list[0][0].inst = list[1][0].inst = nil;
|
||||||
sel.r[0].q0= -1;
|
sel.r[0].q0= -1;
|
||||||
/* Execute machine once for each character, including terminal NUL */
|
/* Execute machine once for each character, including terminal NUL */
|
||||||
|
nl = list[0];
|
||||||
for(;;--p){
|
for(;;--p){
|
||||||
doloop:
|
doloop:
|
||||||
if(p <= 0){
|
if(p <= 0){
|
||||||
|
|
@ -734,24 +753,20 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
||||||
}
|
}
|
||||||
c = 0;
|
c = 0;
|
||||||
}else{
|
}else{
|
||||||
if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
|
if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
|
||||||
break;
|
break;
|
||||||
c = textreadc(t, p-1);
|
c = textreadc(t, p-1);
|
||||||
}
|
}
|
||||||
/* fast check for first char */
|
/* fast check for first char */
|
||||||
if(startchar && nnl==0 && c!=startchar)
|
if(startchar && nl->inst==0 && c!=startchar)
|
||||||
continue;
|
continue;
|
||||||
tl = list[flag];
|
tl = list[flag];
|
||||||
nl = list[flag^=1];
|
nl = list[flag^=1];
|
||||||
nl->inst = nil;
|
nl->inst = nil;
|
||||||
ntl = nnl;
|
|
||||||
nnl = 0;
|
|
||||||
if(sel.r[0].q0<0 && (!wrapped || p>startp)){
|
if(sel.r[0].q0<0 && (!wrapped || p>startp)){
|
||||||
/* Add first instruction to this list */
|
/* Add first instruction to this list */
|
||||||
/* the minus is so the optimizations in addinst work */
|
sempty.r[0].q0 = p;
|
||||||
sempty.r[0].q0 = -p;
|
if(addinst(tl, bstartinst, &sempty) < 0){
|
||||||
if(addinst(tl, bstartinst, &sempty))
|
|
||||||
if(++ntl >= NLIST){
|
|
||||||
Overflow:
|
Overflow:
|
||||||
warning(nil, "regexp list overflow\n");
|
warning(nil, "regexp list overflow\n");
|
||||||
sel.r[0].q0 = -1;
|
sel.r[0].q0 = -1;
|
||||||
|
|
@ -765,8 +780,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
||||||
default: /* regular character */
|
default: /* regular character */
|
||||||
if(inst->type == c){
|
if(inst->type == c){
|
||||||
Addinst:
|
Addinst:
|
||||||
if(addinst(nl, inst->u1.next, &tlp->se))
|
if(addinst(nl, inst->u1.next, &tlp->se) < 0)
|
||||||
if(++nnl >= NLIST)
|
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -804,15 +818,9 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
||||||
goto Addinst;
|
goto Addinst;
|
||||||
break;
|
break;
|
||||||
case OR:
|
case OR:
|
||||||
/* evaluate right choice later */
|
/* already expanded; just a placeholder */
|
||||||
if(addinst(tl, inst->u.right, &tlp->se))
|
break;
|
||||||
if(++ntl >= NLIST)
|
|
||||||
goto Overflow;
|
|
||||||
/* efficiency: advance and re-evaluate */
|
|
||||||
inst = inst->u1.left;
|
|
||||||
goto Switchstmt;
|
|
||||||
case END: /* Match! */
|
case END: /* Match! */
|
||||||
tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
|
|
||||||
tlp->se.r[0].q1 = p;
|
tlp->se.r[0].q1 = p;
|
||||||
bnewmatch(&tlp->se);
|
bnewmatch(&tlp->se);
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue