rc: handle 4-byte utf-8

R=rsc
http://codereview.appspot.com/3833043
This commit is contained in:
Russ Cox 2011-01-02 13:44:15 -05:00
parent 72f66c2d3c
commit 0786c9647c
3 changed files with 20 additions and 0 deletions

View file

@ -125,6 +125,17 @@ equtf(char *p, char *q)
return 1; /* broken code at end of string! */ return 1; /* broken code at end of string! */
return p[2]==q[2]; return p[2]==q[2];
} }
if(fourbyte(*p)){
if(p[1]!=q[1])
return 0;
if(p[1]=='\0')
return 1;
if(p[2]!=q[2])
return 0;
if(p[2]=='\0')
return 1;
return p[3]==q[3];
}
return 1; return 1;
} }
/* /*
@ -137,6 +148,7 @@ nextutf(char *p)
{ {
if(twobyte(*p)) return p[1]=='\0'?p+1:p+2; if(twobyte(*p)) return p[1]=='\0'?p+1:p+2;
if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3; if(threebyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p+3;
if(fourbyte(*p)) return p[1]=='\0'?p+1:p[2]=='\0'?p+2:p[3]=='\0'?p+3:p+4;
return p+1; return p+1;
} }
/* /*
@ -149,6 +161,7 @@ unicode(char *p)
int u=*p&0xff; int u=*p&0xff;
if(twobyte(u)) return ((u&0x1f)<<6)|(p[1]&0x3f); if(twobyte(u)) return ((u&0x1f)<<6)|(p[1]&0x3f);
if(threebyte(u)) return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f); if(threebyte(u)) return (u<<12)|((p[1]&0x3f)<<6)|(p[2]&0x3f);
if(fourbyte(u)) return (u<<18)|((p[1]&0x3f)<<12)|((p[2]&0x3f)<<6)|(p[3]&0x3f);
return u; return u;
} }
/* /*

View file

@ -173,6 +173,11 @@ addutf(char *p, int c)
p = addtok(p, advance()); p = addtok(p, advance());
return addtok(p, advance()); return addtok(p, advance());
} }
if(fourbyte(c)){ /* 4-byte escape */
p = addtok(p, advance());
p = addtok(p, advance());
return addtok(p, advance());
}
return p; return p;
} }
int lastdol; /* was the last token read '$' or '$#' or '"'? */ int lastdol; /* was the last token read '$' or '$#' or '"'? */

View file

@ -121,6 +121,8 @@ int mypid;
#define onebyte(c) ((c&0x80)==0x00) #define onebyte(c) ((c&0x80)==0x00)
#define twobyte(c) ((c&0xe0)==0xc0) #define twobyte(c) ((c&0xe0)==0xc0)
#define threebyte(c) ((c&0xf0)==0xe0) #define threebyte(c) ((c&0xf0)==0xe0)
#define fourbyte(c) ((c&0xf8)==0xf0)
char **argp; char **argp;
char **args; char **args;
int nerror; /* number of errors encountered during compilation */ int nerror; /* number of errors encountered during compilation */