Update libregexp with recent bug fixes from Plan 9.
This commit is contained in:
parent
4da83e7cce
commit
62390091b7
12 changed files with 111 additions and 81 deletions
|
|
@ -1,6 +1,2 @@
|
|||
#include <fmt.h>
|
||||
#include <setjmp.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
|
|
|
|||
6
src/libregexp/lib9.h.std
Normal file
6
src/libregexp/lib9.h.std
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#include <fmt.h>
|
||||
#include <setjmp.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
|
@ -9,7 +9,6 @@ OFILES=\
|
|||
regexec.$O\
|
||||
regsub.$O\
|
||||
regaux.$O\
|
||||
rregaux.$O\
|
||||
rregexec.$O\
|
||||
rregsub.$O\
|
||||
|
||||
|
|
|
|||
|
|
@ -30,19 +30,27 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
|
|||
extern Relist*
|
||||
_renewthread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
Resublist *sep) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if((sep)->m[0].s.sp < p->se.m[0].s.sp)
|
||||
if(sep->m[0].s.sp < p->se.m[0].s.sp){
|
||||
if(ms > 1)
|
||||
p->se = *sep;
|
||||
else
|
||||
p->se.m[0] = sep->m[0];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
if(ms > 1)
|
||||
p->se = *sep;
|
||||
else
|
||||
p->se.m[0] = sep->m[0];
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
|
@ -54,6 +62,7 @@ _renewthread(Relist *lp, /* _relist to add to */
|
|||
extern Relist*
|
||||
_renewemptythread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
char *sp) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
|
@ -61,16 +70,43 @@ _renewemptythread(Relist *lp, /* _relist to add to */
|
|||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(sp < p->se.m[0].s.sp) {
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
extern Relist*
|
||||
_rrenewemptythread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
Rune *rsp) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(rsp < p->se.m[0].s.rsp) {
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@ struct Node
|
|||
Reinst* last;
|
||||
}Node;
|
||||
|
||||
Reprog RePrOg;
|
||||
|
||||
#define NSTACK 20
|
||||
static Node andstack[NSTACK];
|
||||
static Node *andp;
|
||||
|
|
@ -252,7 +250,7 @@ optimize(Reprog *pp)
|
|||
* and then relocate the code.
|
||||
*/
|
||||
size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
|
||||
npp = (Reprog *)realloc(pp, size);
|
||||
npp = realloc(pp, size);
|
||||
if(npp==0 || npp==pp)
|
||||
return pp;
|
||||
diff = (char *)npp - (char *)pp;
|
||||
|
|
@ -303,12 +301,12 @@ dump(Reprog *pp)
|
|||
print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
|
||||
l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
|
||||
if(l->type == RUNE)
|
||||
print("\t%C\n", l->r);
|
||||
print("\t%C\n", l->u1.r);
|
||||
else if(l->type == CCLASS || l->type == NCCLASS){
|
||||
print("\t[");
|
||||
if(l->type == NCCLASS)
|
||||
print("^");
|
||||
for(p = l->cp->spans; p < l->cp->end; p += 2)
|
||||
for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
|
||||
if(p[0] == p[1])
|
||||
print("%C", p[0]);
|
||||
else
|
||||
|
|
@ -477,7 +475,7 @@ regcomp1(char *s, int literal, int dot_type)
|
|||
Reprog *pp;
|
||||
|
||||
/* get memory for the program */
|
||||
pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
|
||||
pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
|
||||
if(pp == 0){
|
||||
regerror("out of memory");
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ struct Reljunk
|
|||
Rune* reol;
|
||||
};
|
||||
|
||||
extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
|
||||
extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
|
||||
extern void _renewmatch(Resub*, int, Resublist*);
|
||||
extern Relist* _renewemptythread(Relist*, Reinst*, char*);
|
||||
extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*);
|
||||
extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
|
||||
extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
|
||||
|
|
|
|||
|
|
@ -10,5 +10,5 @@ regerror(char *s)
|
|||
strcat(buf, s);
|
||||
strcat(buf, "\n");
|
||||
write(2, buf, strlen(buf));
|
||||
exit(1);
|
||||
exits("regerr");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = utfrune(s, j->startchar);
|
||||
if(p == 0)
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
|
|
@ -56,14 +56,14 @@ regexec1(Reprog *progp, /* program to run */
|
|||
if(s == bol)
|
||||
break;
|
||||
p = utfrune(s, '\n');
|
||||
if(p == 0)
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
r = *(uchar*)s;
|
||||
if(r < (Rune)Runeself)
|
||||
if(r < Runeself)
|
||||
n = 1;
|
||||
else
|
||||
n = chartorune(&r, s);
|
||||
|
|
@ -77,7 +77,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
|
||||
/* Add first instruction to current list */
|
||||
if(match == 0)
|
||||
_renewemptythread(tl, progp->startinst, s);
|
||||
_renewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
|
||||
|
|
@ -85,7 +85,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
|
@ -97,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
|
|||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
|
|
@ -116,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
|
@ -127,12 +127,12 @@ regexec1(Reprog *progp, /* program to run */
|
|||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
|
||||
if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
|
|
@ -162,15 +162,27 @@ regexec2(Reprog *progp, /* program to run */
|
|||
Reljunk *j
|
||||
)
|
||||
{
|
||||
Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE];
|
||||
int rv;
|
||||
Relist *relist0, *relist1;
|
||||
|
||||
/* mark space */
|
||||
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist0 == nil)
|
||||
return -1;
|
||||
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist1 == nil){
|
||||
free(relist1);
|
||||
return -1;
|
||||
}
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j->reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
|
||||
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
|
||||
|
||||
return regexec1(progp, bol, mp, ms, j);
|
||||
rv = regexec1(progp, bol, mp, ms, j);
|
||||
free(relist0);
|
||||
free(relist1);
|
||||
return rv;
|
||||
}
|
||||
|
||||
extern int
|
||||
|
|
@ -196,7 +208,7 @@ regexec(Reprog *progp, /* program to run */
|
|||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->u1.r;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,9 +53,10 @@ regsub(char *sp, /* source string */
|
|||
ssp < mp[0].e.ep; ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
}else
|
||||
}else{
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
}
|
||||
sp++;
|
||||
}
|
||||
*dp = '\0';
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
extern Relist*
|
||||
_rrenewemptythread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
Rune *rsp) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(rsp < p->se.m[0].s.rsp) {
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
|
@ -45,7 +45,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
while(*s != j->startchar) {
|
||||
if(*s == 0)
|
||||
if(*s == 0 || s == j->reol)
|
||||
return match;
|
||||
s++;
|
||||
}
|
||||
|
|
@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
if(s == bol)
|
||||
break;
|
||||
while(*s != '\n') {
|
||||
if(*s == 0)
|
||||
if(*s == 0 || s == j->reol)
|
||||
return match;
|
||||
s++;
|
||||
}
|
||||
|
|
@ -72,7 +72,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
_rrenewemptythread(tl, progp->startinst, s);
|
||||
_rrenewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){
|
||||
|
|
@ -80,7 +80,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case LBRA:
|
||||
|
|
@ -91,11 +91,11 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
|
|
@ -110,7 +110,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
|
@ -121,12 +121,12 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
|
||||
if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
|
|
@ -190,7 +190,7 @@ rregexec(Reprog *progp, /* program to run */
|
|||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->u1.r;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,12 +5,14 @@
|
|||
extern void
|
||||
rregsub(Rune *sp, /* source string */
|
||||
Rune *dp, /* destination string */
|
||||
int dlen,
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements pointed to by mp */
|
||||
{
|
||||
Rune *ssp;
|
||||
Rune *ssp, *ep;
|
||||
int i;
|
||||
|
||||
ep = dp+(dlen/sizeof(Rune))-1;
|
||||
while(*sp != '\0'){
|
||||
if(*sp == '\\'){
|
||||
switch(*++sp){
|
||||
|
|
@ -29,15 +31,18 @@ rregsub(Rune *sp, /* source string */
|
|||
for(ssp = mp[i].s.rsp;
|
||||
ssp < mp[i].e.rep;
|
||||
ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
break;
|
||||
case '\\':
|
||||
if(dp < ep)
|
||||
*dp++ = '\\';
|
||||
break;
|
||||
case '\0':
|
||||
sp--;
|
||||
break;
|
||||
default:
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
}
|
||||
|
|
@ -46,9 +51,12 @@ rregsub(Rune *sp, /* source string */
|
|||
if(mp[0].s.rsp != 0)
|
||||
for(ssp = mp[0].s.rsp;
|
||||
ssp < mp[0].e.rep; ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
}else
|
||||
}else{
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
}
|
||||
sp++;
|
||||
}
|
||||
*dp = '\0';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue