plan9port/src/cmd/tcs/html.c

472 lines
7.5 KiB
C
Raw Normal View History

2005-12-27 23:16:48 +00:00
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "hdr.h"
#include "conv.h"
typedef struct Hchar Hchar;
struct Hchar
{
char *s;
Rune r;
};
/* &lt;, &gt;, &quot;, &amp; intentionally omitted */
/*
* Names beginning with _ are names we recognize
* (without the underscore) but will not generate,
* because they are nonstandard.
*/
2005-12-27 23:16:48 +00:00
static Hchar byname[] =
{
{"AElig", 198},
{"Aacute", 193},
{"Acirc", 194},
{"Agrave", 192},
2006-05-21 18:57:51 +00:00
{"Alpha", 913},
2005-12-27 23:16:48 +00:00
{"Aring", 197},
{"Atilde", 195},
{"Auml", 196},
2006-05-21 18:57:51 +00:00
{"Beta", 914},
2005-12-27 23:16:48 +00:00
{"Ccedil", 199},
2006-05-21 18:57:51 +00:00
{"Chi", 935},
{"Dagger", 8225},
{"Delta", 916},
2005-12-27 23:16:48 +00:00
{"ETH", 208},
{"Eacute", 201},
{"Ecirc", 202},
{"Egrave", 200},
2006-05-21 18:57:51 +00:00
{"Epsilon", 917},
{"Eta", 919},
2005-12-27 23:16:48 +00:00
{"Euml", 203},
2006-05-21 18:57:51 +00:00
{"Gamma", 915},
2005-12-27 23:16:48 +00:00
{"Iacute", 205},
{"Icirc", 206},
{"Igrave", 204},
2006-05-21 18:57:51 +00:00
{"Iota", 921},
2005-12-27 23:16:48 +00:00
{"Iuml", 207},
2006-05-21 18:57:51 +00:00
{"Kappa", 922},
{"Lambda", 923},
{"Mu", 924},
2005-12-27 23:16:48 +00:00
{"Ntilde", 209},
2006-05-21 18:57:51 +00:00
{"Nu", 925},
{"OElig", 338},
2005-12-27 23:16:48 +00:00
{"Oacute", 211},
{"Ocirc", 212},
{"Ograve", 210},
2006-05-21 18:57:51 +00:00
{"Omega", 937},
{"Omicron", 927},
2005-12-27 23:16:48 +00:00
{"Oslash", 216},
{"Otilde", 213},
{"Ouml", 214},
2006-05-21 18:57:51 +00:00
{"Phi", 934},
{"Pi", 928},
{"Prime", 8243},
{"Psi", 936},
{"Rho", 929},
{"Scaron", 352},
{"Sigma", 931},
2005-12-27 23:16:48 +00:00
{"THORN", 222},
2006-05-21 18:57:51 +00:00
{"Tau", 932},
{"Theta", 920},
2005-12-27 23:16:48 +00:00
{"Uacute", 218},
{"Ucirc", 219},
{"Ugrave", 217},
2006-05-21 18:57:51 +00:00
{"Upsilon", 933},
2005-12-27 23:16:48 +00:00
{"Uuml", 220},
2006-05-21 18:57:51 +00:00
{"Xi", 926},
2005-12-27 23:16:48 +00:00
{"Yacute", 221},
2006-05-21 18:57:51 +00:00
{"Yuml", 376},
{"Zeta", 918},
2005-12-27 23:16:48 +00:00
{"aacute", 225},
{"acirc", 226},
{"acute", 180},
{"aelig", 230},
{"agrave", 224},
2006-05-21 18:57:51 +00:00
{"alefsym", 8501},
2005-12-27 23:16:48 +00:00
{"alpha", 945},
2006-05-21 18:57:51 +00:00
{"amp", 38},
{"and", 8743},
{"ang", 8736},
2005-12-27 23:16:48 +00:00
{"aring", 229},
2006-05-21 18:57:51 +00:00
{"asymp", 8776},
2005-12-27 23:16:48 +00:00
{"atilde", 227},
{"auml", 228},
2006-05-21 18:57:51 +00:00
{"bdquo", 8222},
2005-12-27 23:16:48 +00:00
{"beta", 946},
{"brvbar", 166},
2006-05-21 18:57:51 +00:00
{"bull", 8226},
{"cap", 8745},
2005-12-27 23:16:48 +00:00
{"ccedil", 231},
{"cdots", 8943},
{"cedil", 184},
{"cent", 162},
{"chi", 967},
2006-05-21 18:57:51 +00:00
{"circ", 710},
{"clubs", 9827},
{"cong", 8773},
2005-12-27 23:16:48 +00:00
{"copy", 169},
2006-05-21 18:57:51 +00:00
{"crarr", 8629},
{"cup", 8746},
2005-12-27 23:16:48 +00:00
{"curren", 164},
2006-05-21 18:57:51 +00:00
{"dArr", 8659},
{"dagger", 8224},
{"darr", 8595},
2005-12-27 23:16:48 +00:00
{"ddots", 8945},
{"deg", 176},
{"delta", 948},
2006-05-21 18:57:51 +00:00
{"diams", 9830},
2005-12-27 23:16:48 +00:00
{"divide", 247},
{"eacute", 233},
{"ecirc", 234},
{"egrave", 232},
{"_emdash", 8212}, /* non-standard but commonly used */
2006-05-21 18:57:51 +00:00
{"empty", 8709},
2005-12-27 23:16:48 +00:00
{"emsp", 8195},
{"_endash", 8211}, /* non-standard but commonly used */
2005-12-27 23:16:48 +00:00
{"ensp", 8194},
{"epsilon", 949},
2006-05-21 18:57:51 +00:00
{"equiv", 8801},
2005-12-27 23:16:48 +00:00
{"eta", 951},
{"eth", 240},
{"euml", 235},
2006-05-21 18:57:51 +00:00
{"euro", 8364},
{"exist", 8707},
{"fnof", 402},
{"forall", 8704},
2005-12-27 23:16:48 +00:00
{"frac12", 189},
{"frac14", 188},
{"frac34", 190},
2006-05-21 18:57:51 +00:00
{"frasl", 8260},
2005-12-27 23:16:48 +00:00
{"gamma", 947},
2006-05-21 18:57:51 +00:00
{"ge", 8805},
{"gt", 62},
{"hArr", 8660},
{"harr", 8596},
{"hearts", 9829},
{"hellip", 8230},
2005-12-27 23:16:48 +00:00
{"iacute", 237},
{"icirc", 238},
{"iexcl", 161},
{"igrave", 236},
2006-05-21 18:57:51 +00:00
{"image", 8465},
{"infin", 8734},
{"int", 8747},
2005-12-27 23:16:48 +00:00
{"iota", 953},
{"iquest", 191},
2006-05-21 18:57:51 +00:00
{"isin", 8712},
2005-12-27 23:16:48 +00:00
{"iuml", 239},
{"kappa", 954},
2006-05-21 18:57:51 +00:00
{"lArr", 8656},
2005-12-27 23:16:48 +00:00
{"lambda", 955},
2006-05-21 18:57:51 +00:00
{"lang", 9001},
2005-12-27 23:16:48 +00:00
{"laquo", 171},
2006-05-21 18:57:51 +00:00
{"larr", 8592},
{"lceil", 8968},
{"_ldots", 8230},
2006-05-21 18:57:51 +00:00
{"ldquo", 8220},
{"le", 8804},
{"lfloor", 8970},
{"lowast", 8727},
{"loz", 9674},
{"lrm", 8206},
{"lsaquo", 8249},
2005-12-27 23:16:48 +00:00
{"lsquo", 8216},
2006-05-21 18:57:51 +00:00
{"lt", 60},
2005-12-27 23:16:48 +00:00
{"macr", 175},
{"mdash", 8212},
{"micro", 181},
{"middot", 183},
2006-05-21 18:57:51 +00:00
{"minus", 8722},
2005-12-27 23:16:48 +00:00
{"mu", 956},
2006-05-21 18:57:51 +00:00
{"nabla", 8711},
2005-12-27 23:16:48 +00:00
{"nbsp", 160},
{"ndash", 8211},
2006-05-21 18:57:51 +00:00
{"ne", 8800},
{"ni", 8715},
2005-12-27 23:16:48 +00:00
{"not", 172},
2006-05-21 18:57:51 +00:00
{"notin", 8713},
{"nsub", 8836},
2005-12-27 23:16:48 +00:00
{"ntilde", 241},
{"nu", 957},
{"oacute", 243},
{"ocirc", 244},
2006-05-21 18:57:51 +00:00
{"oelig", 339},
2005-12-27 23:16:48 +00:00
{"ograve", 242},
2006-05-21 18:57:51 +00:00
{"oline", 8254},
2005-12-27 23:16:48 +00:00
{"omega", 969},
{"omicron", 959},
2006-05-21 18:57:51 +00:00
{"oplus", 8853},
{"or", 8744},
2005-12-27 23:16:48 +00:00
{"ordf", 170},
{"ordm", 186},
{"oslash", 248},
{"otilde", 245},
2006-05-21 18:57:51 +00:00
{"otimes", 8855},
2005-12-27 23:16:48 +00:00
{"ouml", 246},
{"para", 182},
2006-05-21 18:57:51 +00:00
{"part", 8706},
{"permil", 8240},
{"perp", 8869},
2005-12-27 23:16:48 +00:00
{"phi", 966},
{"pi", 960},
2006-05-21 18:57:51 +00:00
{"piv", 982},
2005-12-27 23:16:48 +00:00
{"plusmn", 177},
{"pound", 163},
2006-05-21 18:57:51 +00:00
{"prime", 8242},
{"prod", 8719},
{"prop", 8733},
2005-12-27 23:16:48 +00:00
{"psi", 968},
{"quad", 8193},
2006-05-21 18:57:51 +00:00
{"quot", 34},
{"rArr", 8658},
{"radic", 8730},
{"rang", 9002},
2005-12-27 23:16:48 +00:00
{"raquo", 187},
2006-05-21 18:57:51 +00:00
{"rarr", 8594},
{"rceil", 8969},
2005-12-27 23:16:48 +00:00
{"rdquo", 8221},
2006-05-21 18:57:51 +00:00
{"real", 8476},
2005-12-27 23:16:48 +00:00
{"reg", 174},
2006-05-21 18:57:51 +00:00
{"rfloor", 8971},
2005-12-27 23:16:48 +00:00
{"rho", 961},
2006-05-21 18:57:51 +00:00
{"rlm", 8207},
{"rsaquo", 8250},
2005-12-27 23:16:48 +00:00
{"rsquo", 8217},
2006-05-21 18:57:51 +00:00
{"sbquo", 8218},
{"scaron", 353},
{"sdot", 8901},
2005-12-27 23:16:48 +00:00
{"sect", 167},
{"shy", 173},
{"sigma", 963},
2006-05-21 18:57:51 +00:00
{"sigmaf", 962},
{"sim", 8764},
{"_sp", 8194},
2006-05-21 18:57:51 +00:00
{"spades", 9824},
{"sub", 8834},
{"sube", 8838},
{"sum", 8721},
{"sup", 8835},
2005-12-27 23:16:48 +00:00
{"sup1", 185},
{"sup2", 178},
{"sup3", 179},
2006-05-21 18:57:51 +00:00
{"supe", 8839},
2005-12-27 23:16:48 +00:00
{"szlig", 223},
{"tau", 964},
2006-05-21 18:57:51 +00:00
{"there4", 8756},
2005-12-27 23:16:48 +00:00
{"theta", 952},
2006-05-21 18:57:51 +00:00
{"thetasym", 977},
2005-12-27 23:16:48 +00:00
{"thinsp", 8201},
{"thorn", 254},
2006-05-21 18:57:51 +00:00
{"tilde", 732},
2005-12-27 23:16:48 +00:00
{"times", 215},
{"trade", 8482},
2006-05-21 18:57:51 +00:00
{"uArr", 8657},
2005-12-27 23:16:48 +00:00
{"uacute", 250},
2006-05-21 18:57:51 +00:00
{"uarr", 8593},
2005-12-27 23:16:48 +00:00
{"ucirc", 251},
{"ugrave", 249},
{"uml", 168},
2006-05-21 18:57:51 +00:00
{"upsih", 978},
2005-12-27 23:16:48 +00:00
{"upsilon", 965},
{"uuml", 252},
{"_varepsilon", 8712},
2005-12-27 23:16:48 +00:00
{"varphi", 981},
{"_varpi", 982},
2005-12-27 23:16:48 +00:00
{"varrho", 1009},
{"vdots", 8942},
{"_vsigma", 962},
{"_vtheta", 977},
2006-05-21 18:57:51 +00:00
{"weierp", 8472},
2005-12-27 23:16:48 +00:00
{"xi", 958},
{"yacute", 253},
{"yen", 165},
{"yuml", 255},
2006-05-21 18:57:51 +00:00
{"zeta", 950},
{"zwj", 8205},
{"zwnj", 8204}
2005-12-27 23:16:48 +00:00
};
static Hchar byrune[nelem(byname)];
static int
hnamecmp(const void *va, const void *vb)
{
Hchar *a, *b;
a = (Hchar*)va;
b = (Hchar*)vb;
return strcmp(a->s, b->s);
}
static int
hrunecmp(const void *va, const void *vb)
{
Hchar *a, *b;
a = (Hchar*)va;
b = (Hchar*)vb;
return a->r - b->r;
}
static void
html_init(void)
{
static int init;
int i;
2005-12-27 23:16:48 +00:00
if(init)
return;
init = 1;
memmove(byrune, byname, sizeof byrune);
/* Eliminate names we aren't allowed to generate. */
for(i=0; i<nelem(byrune); i++){
if(byrune[i].s[0] == '_'){
byrune[i].r = Runeerror;
byname[i].s++;
}
}
2005-12-27 23:16:48 +00:00
qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
}
static Rune
findbyname(char *s)
{
Hchar *h;
int n, m, x;
h = byname;
n = nelem(byname);
while(n > 0){
m = n/2;
x = strcmp(h[m].s, s);
if(x == 0)
return h[m].r;
if(x < 0){
h += m+1;
n -= m+1;
}else
n = m;
}
return Runeerror;
}
static char*
findbyrune(Rune r)
{
Hchar *h;
int n, m;
if(r == Runeerror)
return nil;
2005-12-27 23:16:48 +00:00
h = byrune;
n = nelem(byrune);
while(n > 0){
m = n/2;
if(h[m].r == r)
return h[m].s;
if(h[m].r < r){
h += m+1;
n -= m+1;
}else
n = m;
}
return nil;
}
void
html_in(int fd, long *x, struct convert *out)
{
char buf[100], *p;
Biobuf b;
Rune rbuf[N];
Rune *r, *er;
int c, i;
USED(x);
html_init();
r = rbuf;
er = rbuf+N;
Binit(&b, fd, OREAD);
while((c = Bgetrune(&b)) != Beof){
if(r >= er){
OUT(out, rbuf, r-rbuf);
r = rbuf;
}
if(c == '&'){
buf[0] = c;
for(i=1; i<nelem(buf)-1;){
c = Bgetc(&b);
if(c == Beof)
break;
buf[i++] = c;
if(strchr("; \t\r\n", c))
break;
}
buf[i] = 0;
if(buf[i-1] == ';'){
buf[i-1] = 0;
if((c = findbyname(buf+1)) != Runeerror){
*r++ = c;
continue;
}
buf[i-1] = ';';
if(buf[1] == '#'){
if(buf[2] == 'x')
c = strtol(buf+3, &p, 16);
else
c = strtol(buf+2, &p, 10);
if(*p != ';' || c >= NRUNE || c < 0)
goto bad;
*r++ = c;
continue;
}
}
bad:
for(p=buf; p<buf+i; ){
p += chartorune(r++, p);
if(r >= er){
OUT(out, rbuf, r-rbuf);
r = rbuf;
}
}
continue;
}
*r++ = c;
}
if(r > rbuf)
OUT(out, rbuf, r-rbuf);
2006-05-21 18:57:51 +00:00
OUT(out, rbuf, 0);
2005-12-27 23:16:48 +00:00
}
/*
* use biobuf because can use more than UTFmax bytes per rune
*/
void
html_out(Rune *r, int n, long *x)
{
char *s;
Biobuf b;
Rune *er;
2006-05-21 18:57:51 +00:00
USED(x);
2005-12-27 23:16:48 +00:00
html_init();
Binit(&b, 1, OWRITE);
er = r+n;
for(; r<er; r++){
if(*r < Runeself)
Bputrune(&b, *r);
else if((s = findbyrune(*r)) != nil)
Bprint(&b, "&%s;", s);
else
2006-02-14 19:44:18 +00:00
Bprint(&b, "&#%d;", *r);
2005-12-27 23:16:48 +00:00
}
Bflush(&b);
}