Update libregexp with recent bug fixes from Plan 9.

This commit is contained in:
rsc 2004-03-05 05:13:56 +00:00
parent 4da83e7cce
commit 62390091b7
12 changed files with 111 additions and 81 deletions

View file

@ -1,6 +1,2 @@
#include <fmt.h> #include <u.h>
#include <setjmp.h> #include <libc.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

6
src/libregexp/lib9.h.std Normal file
View file

@ -0,0 +1,6 @@
#include <fmt.h>
#include <setjmp.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

View file

@ -9,7 +9,6 @@ OFILES=\
regexec.$O\ regexec.$O\
regsub.$O\ regsub.$O\
regaux.$O\ regaux.$O\
rregaux.$O\
rregexec.$O\ rregexec.$O\
rregsub.$O\ rregsub.$O\

View file

@ -30,19 +30,27 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
extern Relist* extern Relist*
_renewthread(Relist *lp, /* _relist to add to */ _renewthread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */ Reinst *ip, /* instruction to add */
int ms,
Resublist *sep) /* pointers to subexpressions */ Resublist *sep) /* pointers to subexpressions */
{ {
Relist *p; Relist *p;
for(p=lp; p->inst; p++){ for(p=lp; p->inst; p++){
if(p->inst == ip){ if(p->inst == ip){
if((sep)->m[0].s.sp < p->se.m[0].s.sp) if(sep->m[0].s.sp < p->se.m[0].s.sp){
p->se = *sep; if(ms > 1)
p->se = *sep;
else
p->se.m[0] = sep->m[0];
}
return 0; return 0;
} }
} }
p->inst = ip; p->inst = ip;
p->se = *sep; if(ms > 1)
p->se = *sep;
else
p->se.m[0] = sep->m[0];
(++p)->inst = 0; (++p)->inst = 0;
return p; return p;
} }
@ -54,6 +62,7 @@ _renewthread(Relist *lp, /* _relist to add to */
extern Relist* extern Relist*
_renewemptythread(Relist *lp, /* _relist to add to */ _renewemptythread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */ Reinst *ip, /* instruction to add */
int ms,
char *sp) /* pointers to subexpressions */ char *sp) /* pointers to subexpressions */
{ {
Relist *p; Relist *p;
@ -61,16 +70,43 @@ _renewemptythread(Relist *lp, /* _relist to add to */
for(p=lp; p->inst; p++){ for(p=lp; p->inst; p++){
if(p->inst == ip){ if(p->inst == ip){
if(sp < p->se.m[0].s.sp) { if(sp < p->se.m[0].s.sp) {
memset((void *)&p->se, 0, sizeof(p->se)); if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.sp = sp; p->se.m[0].s.sp = sp;
} }
return 0; return 0;
} }
} }
p->inst = ip; p->inst = ip;
memset((void *)&p->se, 0, sizeof(p->se)); if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.sp = sp; p->se.m[0].s.sp = sp;
(++p)->inst = 0; (++p)->inst = 0;
return p; return p;
} }
extern Relist*
_rrenewemptythread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */
int ms,
Rune *rsp) /* pointers to subexpressions */
{
Relist *p;
for(p=lp; p->inst; p++){
if(p->inst == ip){
if(rsp < p->se.m[0].s.rsp) {
if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
}
return 0;
}
}
p->inst = ip;
if(ms > 1)
memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
(++p)->inst = 0;
return p;
}

View file

@ -15,8 +15,6 @@ struct Node
Reinst* last; Reinst* last;
}Node; }Node;
Reprog RePrOg;
#define NSTACK 20 #define NSTACK 20
static Node andstack[NSTACK]; static Node andstack[NSTACK];
static Node *andp; static Node *andp;
@ -252,7 +250,7 @@ optimize(Reprog *pp)
* and then relocate the code. * and then relocate the code.
*/ */
size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
npp = (Reprog *)realloc(pp, size); npp = realloc(pp, size);
if(npp==0 || npp==pp) if(npp==0 || npp==pp)
return pp; return pp;
diff = (char *)npp - (char *)pp; diff = (char *)npp - (char *)pp;
@ -303,12 +301,12 @@ dump(Reprog *pp)
print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
if(l->type == RUNE) if(l->type == RUNE)
print("\t%C\n", l->r); print("\t%C\n", l->u1.r);
else if(l->type == CCLASS || l->type == NCCLASS){ else if(l->type == CCLASS || l->type == NCCLASS){
print("\t["); print("\t[");
if(l->type == NCCLASS) if(l->type == NCCLASS)
print("^"); print("^");
for(p = l->cp->spans; p < l->cp->end; p += 2) for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
if(p[0] == p[1]) if(p[0] == p[1])
print("%C", p[0]); print("%C", p[0]);
else else
@ -477,7 +475,7 @@ regcomp1(char *s, int literal, int dot_type)
Reprog *pp; Reprog *pp;
/* get memory for the program */ /* get memory for the program */
pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
if(pp == 0){ if(pp == 0){
regerror("out of memory"); regerror("out of memory");
return 0; return 0;

View file

@ -68,7 +68,7 @@ struct Reljunk
Rune* reol; Rune* reol;
}; };
extern Relist* _renewthread(Relist*, Reinst*, Resublist*); extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
extern void _renewmatch(Resub*, int, Resublist*); extern void _renewmatch(Resub*, int, Resublist*);
extern Relist* _renewemptythread(Relist*, Reinst*, char*); extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*); extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);

View file

@ -10,5 +10,5 @@ regerror(char *s)
strcat(buf, s); strcat(buf, s);
strcat(buf, "\n"); strcat(buf, "\n");
write(2, buf, strlen(buf)); write(2, buf, strlen(buf));
exit(1); exits("regerr");
} }

View file

@ -48,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
switch(j->starttype) { switch(j->starttype) {
case RUNE: case RUNE:
p = utfrune(s, j->startchar); p = utfrune(s, j->startchar);
if(p == 0) if(p == 0 || s == j->eol)
return match; return match;
s = p; s = p;
break; break;
@ -56,14 +56,14 @@ regexec1(Reprog *progp, /* program to run */
if(s == bol) if(s == bol)
break; break;
p = utfrune(s, '\n'); p = utfrune(s, '\n');
if(p == 0) if(p == 0 || s == j->eol)
return match; return match;
s = p; s = p;
break; break;
} }
} }
r = *(uchar*)s; r = *(uchar*)s;
if(r < (Rune)Runeself) if(r < Runeself)
n = 1; n = 1;
else else
n = chartorune(&r, s); n = chartorune(&r, s);
@ -77,7 +77,7 @@ regexec1(Reprog *progp, /* program to run */
/* Add first instruction to current list */ /* Add first instruction to current list */
if(match == 0) if(match == 0)
_renewemptythread(tl, progp->startinst, s); _renewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */ /* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
@ -85,7 +85,7 @@ regexec1(Reprog *progp, /* program to run */
switch(inst->type){ switch(inst->type){
case RUNE: /* regular character */ case RUNE: /* regular character */
if(inst->u1.r == r){ if(inst->u1.r == r){
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
} }
break; break;
@ -97,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
continue; continue;
case ANY: case ANY:
if(r != '\n') if(r != '\n')
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case ANYNL: case ANYNL:
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case BOL: case BOL:
@ -116,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end; ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2) for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){ if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
} }
@ -127,12 +127,12 @@ regexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1]) if(r >= rp[0] && r <= rp[1])
break; break;
if(rp == ep) if(rp == ep)
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case OR: case OR:
/* evaluate right choice later */ /* evaluate right choice later */
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
return -1; return -1;
/* efficiency: advance and re-evaluate */ /* efficiency: advance and re-evaluate */
continue; continue;
@ -162,15 +162,27 @@ regexec2(Reprog *progp, /* program to run */
Reljunk *j Reljunk *j
) )
{ {
Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE]; int rv;
Relist *relist0, *relist1;
/* mark space */ /* mark space */
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist0 == nil)
return -1;
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0; j->relist[0] = relist0;
j->relist[1] = relist1; j->relist[1] = relist1;
j->reliste[0] = relist0 + nelem(relist0) - 2; j->reliste[0] = relist0 + BIGLISTSIZE - 2;
j->reliste[1] = relist1 + nelem(relist1) - 2; j->reliste[1] = relist1 + BIGLISTSIZE - 2;
return regexec1(progp, bol, mp, ms, j); rv = regexec1(progp, bol, mp, ms, j);
free(relist0);
free(relist1);
return rv;
} }
extern int extern int
@ -196,7 +208,7 @@ regexec(Reprog *progp, /* program to run */
} }
j.starttype = 0; j.starttype = 0;
j.startchar = 0; j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
j.starttype = RUNE; j.starttype = RUNE;
j.startchar = progp->startinst->u1.r; j.startchar = progp->startinst->u1.r;
} }

View file

@ -53,9 +53,10 @@ regsub(char *sp, /* source string */
ssp < mp[0].e.ep; ssp++) ssp < mp[0].e.ep; ssp++)
if(dp < ep) if(dp < ep)
*dp++ = *ssp; *dp++ = *ssp;
}else }else{
if(dp < ep) if(dp < ep)
*dp++ = *sp; *dp++ = *sp;
}
sp++; sp++;
} }
*dp = '\0'; *dp = '\0';

View file

@ -1,26 +0,0 @@
#include "lib9.h"
#include "regexp9.h"
#include "regcomp.h"
extern Relist*
_rrenewemptythread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */
Rune *rsp) /* pointers to subexpressions */
{
Relist *p;
for(p=lp; p->inst; p++){
if(p->inst == ip){
if(rsp < p->se.m[0].s.rsp) {
memset((void *)&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
}
return 0;
}
}
p->inst = ip;
memset((void *)&p->se, 0, sizeof(p->se));
p->se.m[0].s.rsp = rsp;
(++p)->inst = 0;
return p;
}

View file

@ -45,7 +45,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(j->starttype) { switch(j->starttype) {
case RUNE: case RUNE:
while(*s != j->startchar) { while(*s != j->startchar) {
if(*s == 0) if(*s == 0 || s == j->reol)
return match; return match;
s++; s++;
} }
@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
if(s == bol) if(s == bol)
break; break;
while(*s != '\n') { while(*s != '\n') {
if(*s == 0) if(*s == 0 || s == j->reol)
return match; return match;
s++; s++;
} }
@ -72,7 +72,7 @@ rregexec1(Reprog *progp, /* program to run */
nl->inst = 0; nl->inst = 0;
/* Add first instruction to current list */ /* Add first instruction to current list */
_rrenewemptythread(tl, progp->startinst, s); _rrenewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */ /* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ for(tlp=tl; tlp->inst; tlp++){
@ -80,7 +80,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(inst->type){ switch(inst->type){
case RUNE: /* regular character */ case RUNE: /* regular character */
if(inst->u1.r == r) if(inst->u1.r == r)
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case LBRA: case LBRA:
@ -91,11 +91,11 @@ rregexec1(Reprog *progp, /* program to run */
continue; continue;
case ANY: case ANY:
if(r != '\n') if(r != '\n')
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case ANYNL: case ANYNL:
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case BOL: case BOL:
@ -110,7 +110,7 @@ rregexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end; ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2) for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){ if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
} }
@ -121,12 +121,12 @@ rregexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1]) if(r >= rp[0] && r <= rp[1])
break; break;
if(rp == ep) if(rp == ep)
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1; return -1;
break; break;
case OR: case OR:
/* evaluate right choice later */ /* evaluate right choice later */
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
return -1; return -1;
/* efficiency: advance and re-evaluate */ /* efficiency: advance and re-evaluate */
continue; continue;
@ -190,7 +190,7 @@ rregexec(Reprog *progp, /* program to run */
} }
j.starttype = 0; j.starttype = 0;
j.startchar = 0; j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
j.starttype = RUNE; j.starttype = RUNE;
j.startchar = progp->startinst->u1.r; j.startchar = progp->startinst->u1.r;
} }

View file

@ -5,12 +5,14 @@
extern void extern void
rregsub(Rune *sp, /* source string */ rregsub(Rune *sp, /* source string */
Rune *dp, /* destination string */ Rune *dp, /* destination string */
int dlen,
Resub *mp, /* subexpression elements */ Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */ int ms) /* number of elements pointed to by mp */
{ {
Rune *ssp; Rune *ssp, *ep;
int i; int i;
ep = dp+(dlen/sizeof(Rune))-1;
while(*sp != '\0'){ while(*sp != '\0'){
if(*sp == '\\'){ if(*sp == '\\'){
switch(*++sp){ switch(*++sp){
@ -29,16 +31,19 @@ rregsub(Rune *sp, /* source string */
for(ssp = mp[i].s.rsp; for(ssp = mp[i].s.rsp;
ssp < mp[i].e.rep; ssp < mp[i].e.rep;
ssp++) ssp++)
*dp++ = *ssp; if(dp < ep)
*dp++ = *ssp;
break; break;
case '\\': case '\\':
*dp++ = '\\'; if(dp < ep)
*dp++ = '\\';
break; break;
case '\0': case '\0':
sp--; sp--;
break; break;
default: default:
*dp++ = *sp; if(dp < ep)
*dp++ = *sp;
break; break;
} }
}else if(*sp == '&'){ }else if(*sp == '&'){
@ -46,9 +51,12 @@ rregsub(Rune *sp, /* source string */
if(mp[0].s.rsp != 0) if(mp[0].s.rsp != 0)
for(ssp = mp[0].s.rsp; for(ssp = mp[0].s.rsp;
ssp < mp[0].e.rep; ssp++) ssp < mp[0].e.rep; ssp++)
*dp++ = *ssp; if(dp < ep)
}else *dp++ = *ssp;
*dp++ = *sp; }else{
if(dp < ep)
*dp++ = *sp;
}
sp++; sp++;
} }
*dp = '\0'; *dp = '\0';