More & names and numbers.

This commit is contained in:
rsc 2005-09-30 17:45:40 +00:00
parent 01a1c31a7d
commit 431e32de9b

View file

@ -333,7 +333,9 @@ AsciiInt _chartab[] = {
{"kappa", 954},
{"lambda", 955},
{"laquo", 171},
{"ldquo", 8220},
{"ldots", 8230},
{"lsquo", 8216},
{"lt", 60},
{"macr", 175},
{"mdash", 8212},
@ -364,8 +366,10 @@ AsciiInt _chartab[] = {
{"quad", 8193},
{"quot", 34},
{"raquo", 187},
{"rdquo", 8221},
{"reg", 174},
{"rho", 961},
{"rsquo", 8217},
{"sect", 167},
{"shy", 173},
{"sigma", 963},
@ -809,7 +813,7 @@ gettag(TokenSource* ts, int starti, Token* a, int* pai)
// c starts a tagname
buf[0] = c;
i = 1;
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
goto eob_done;
@ -826,7 +830,7 @@ gettag(TokenSource* ts, int starti, Token* a, int* pai)
// attribute gathering loop
al = nil;
while(1) {
for(;;){
// look for "ws name" or "ws name ws = ws val" (ws=whitespace)
// skip whitespace
attrloop_continue:
@ -847,7 +851,7 @@ attrloop_continue:
if(warn)
fprint(2, "warning: expected attribute name\n");
// skipt to next attribute name
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
goto eob_done;
@ -866,7 +870,7 @@ attrloop_continue:
// gather attribute name
buf[0] = c;
i = 1;
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
goto eob_done;
@ -892,7 +896,7 @@ attrloop_continue:
goto attrloop_continue;
}
//# c is '=' here; skip whitespace
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
goto eob_done;
@ -908,7 +912,7 @@ attrloop_continue:
}
val = nil;
nv = 0;
while(1) {
for(;;){
valloop_continue:
if(c < 0)
goto eob_done;
@ -918,7 +922,7 @@ valloop_continue:
// but if line ends before close quote, assume
// there was an unmatched quote
ti = ts->i;
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
goto eob_done;
@ -1053,7 +1057,7 @@ findstr(TokenSource* ts, Rune* s)
c0 = s[0];
n = runestrlen(s);
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
break;
@ -1077,6 +1081,18 @@ mainloop_done:
return 0;
}
static int
xdigit(int c)
{
if('0' <= c && c <= '9')
return c-'0';
if('a' <= c && c <= 'f')
return c-'a'+10;
if('A' <= c && c <= 'F')
return c-'A'+10;
return -1;
}
// We've just read an '&'; look for an entity reference
// name, and if found, return translated char.
// if there is a complete entity name but it isn't known,
@ -1103,11 +1119,17 @@ ampersand(TokenSource* ts)
if(c == '#'){
c = getchar(ts);
v = 0;
while(c >= 0) {
if(!(c < 256 && isdigit(c)))
break;
v = v*10 + c - 48;
if(c == 'x'){
c = getchar(ts);
while((i=xdigit(c)) != -1){
v = v*16 + i;
c = getchar(ts);
}
}else{
while('0' <= c && c <= '9'){
v = v*10 + c - '0';
c = getchar(ts);
}
}
if(c >= 0){
if(!(c == ';' || c == '\n' || c == '\r'))
@ -1125,7 +1147,7 @@ ampersand(TokenSource* ts)
else if(c < 256 && isalpha(c)){
buf[0] = c;
k = 1;
while(1) {
for(;;){
c = getchar(ts);
if(c < 0)
break;