Import version of libhtml that might actually work with ANSI C.
This commit is contained in:
parent
3e3817f7c8
commit
7cf289ca89
13 changed files with 7206 additions and 0 deletions
4238
src/libhtml/build.c
Normal file
4238
src/libhtml/build.c
Normal file
File diff suppressed because it is too large
Load diff
163
src/libhtml/impl.h
Normal file
163
src/libhtml/impl.h
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
|
||||
// UTILS
|
||||
typedef struct List List;
|
||||
typedef struct Strlist Strlist;
|
||||
|
||||
// List of integers (and also generic list with next pointer at beginning)
|
||||
struct List
|
||||
{
|
||||
List* next;
|
||||
int val;
|
||||
};
|
||||
|
||||
struct Strlist
|
||||
{
|
||||
Strlist* next;
|
||||
Rune* val;
|
||||
};
|
||||
|
||||
extern int _inclass(Rune c, Rune* cl);
|
||||
extern int _listlen(List* l);
|
||||
extern Rune* _ltoStr(int n);
|
||||
extern List* _newlist(int val, List* rest);
|
||||
extern Rune* _newstr(int n);
|
||||
extern int _prefix(Rune* pre, Rune* s);
|
||||
extern List* _revlist(List* l);
|
||||
extern void _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
|
||||
extern void _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
|
||||
extern int _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen);
|
||||
extern Rune* _Stradd(Rune*s1, Rune* s2, int n);
|
||||
extern Rune* _Strclass(Rune* s, Rune* cl);
|
||||
extern int _Strcmp(Rune* s1, Rune* s2);
|
||||
extern Rune* _Strdup(Rune* s);
|
||||
extern Rune* _Strdup2(Rune* s, Rune* t);
|
||||
extern int _Streqn(Rune* s1, int n1, Rune* s2);
|
||||
extern int _Strlen(Rune* s);
|
||||
extern Rune* _Strnclass(Rune* s, Rune* cl, int n);
|
||||
extern int _Strncmpci(Rune* s1, int n1, Rune* s2);
|
||||
extern Rune* _Strndup(Rune* s, int n);
|
||||
extern Rune* _Strnrclass(Rune* s, Rune* cl, int n);
|
||||
extern Rune* _Strrclass(Rune* s, Rune* cl);
|
||||
extern Rune* _Strsubstr(Rune* s, int start, int stop);
|
||||
extern long _Strtol(Rune* s, Rune** eptr, int base);
|
||||
extern void _trimwhite(Rune* s, int n, Rune** pans, int* panslen);
|
||||
|
||||
extern Rune notwhitespace[];
|
||||
extern Rune whitespace[];
|
||||
|
||||
// STRINTTAB
|
||||
typedef struct StringInt StringInt;
|
||||
|
||||
// Element of String-Int table (used for keyword lookup)
|
||||
struct StringInt
|
||||
{
|
||||
Rune* key;
|
||||
int val;
|
||||
};
|
||||
|
||||
extern int _lookup(StringInt* t, int n, Rune* key, int keylen, int* pans);
|
||||
extern StringInt* _makestrinttab(Rune** a, int n);
|
||||
extern Rune* _revlookup(StringInt* t, int n, int val);
|
||||
|
||||
// Colors, in html format, not Plan 9 format. (RGB values in bottom 3 bytes)
|
||||
enum {
|
||||
White = 0xFFFFFF,
|
||||
Black = 0x000000,
|
||||
Blue = 0x0000CC,
|
||||
};
|
||||
|
||||
// LEX
|
||||
|
||||
// HTML 4.0 tags (plus blink, nobr)
|
||||
// sorted in lexical order; used as array indices
|
||||
enum {
|
||||
Notfound,
|
||||
Comment,
|
||||
Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea,
|
||||
Tb, Tbase, Tbasefont, Tbdo, Tbig, Tblink,
|
||||
Tblockquote, Tbody, Tbq, Tbr, Tbutton,
|
||||
Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup,
|
||||
Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt,
|
||||
Tem,
|
||||
Tfieldset, Tfont, Tform, Tframe, Tframeset,
|
||||
Th1, Th2, Th3, Th4, Th5, Th6,
|
||||
Thead, Thr, Thtml,
|
||||
Ti, Tiframe, Timg, Tinput, Tins, Tisindex,
|
||||
Tkbd,
|
||||
Tlabel, Tlegend, Tli, Tlink,
|
||||
Tmap, Tmenu, Tmeta,
|
||||
Tnobr, Tnoframes, Tnoscript,
|
||||
Tobject, Tol, Toptgroup, Toption,
|
||||
Tp, Tparam, Tpre,
|
||||
Tq,
|
||||
Ts, Tsamp, Tscript, Tselect, Tsmall,
|
||||
Tspan, Tstrike, Tstrong, Tstyle, Tsub, Tsup,
|
||||
Ttable, Ttbody, Ttd, Ttextarea, Ttfoot,
|
||||
Tth, Tthead, Ttitle, Ttr, Ttt,
|
||||
Tu, Tul,
|
||||
Tvar,
|
||||
Numtags,
|
||||
RBRA = Numtags,
|
||||
Data = Numtags+RBRA
|
||||
};
|
||||
|
||||
// HTML 4.0 tag attributes
|
||||
// Keep sorted in lexical order
|
||||
enum {
|
||||
Aabbr, Aaccept_charset, Aaccess_key, Aaction,
|
||||
Aalign, Aalink, Aalt, Aarchive, Aaxis,
|
||||
Abackground, Abgcolor, Aborder,
|
||||
Acellpadding, Acellspacing, Achar, Acharoff,
|
||||
Acharset, Achecked, Acite, Aclass, Aclassid,
|
||||
Aclear, Acode, Acodebase, Acodetype, Acolor,
|
||||
Acols, Acolspan, Acompact, Acontent, Acoords,
|
||||
Adata, Adatetime, Adeclare, Adefer, Adir, Adisabled,
|
||||
Aenctype,
|
||||
Aface, Afor, Aframe, Aframeborder,
|
||||
Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv,
|
||||
Aid, Aismap,
|
||||
Alabel, Alang, Alink, Alongdesc,
|
||||
Amarginheight, Amarginwidth, Amaxlength,
|
||||
Amedia, Amethod, Amultiple,
|
||||
Aname, Anohref, Anoresize, Anoshade, Anowrap,
|
||||
Aobject, Aonblur, Aonchange, Aonclick, Aondblclick,
|
||||
Aonfocus, Aonkeypress, Aonkeyup, Aonload,
|
||||
Aonmousedown, Aonmousemove, Aonmouseout,
|
||||
Aonmouseover, Aonmouseup, Aonreset, Aonselect,
|
||||
Aonsubmit, Aonunload,
|
||||
Aprofile, Aprompt,
|
||||
Areadonly, Arel, Arev, Arows, Arowspan, Arules,
|
||||
Ascheme, Ascope, Ascrolling, Aselected, Ashape,
|
||||
Asize, Aspan, Asrc, Astandby, Astart, Astyle, Asummary,
|
||||
Atabindex, Atarget, Atext, Atitle, Atype,
|
||||
Ausemap,
|
||||
Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace,
|
||||
Awidth,
|
||||
Numattrs
|
||||
};
|
||||
|
||||
struct Attr
|
||||
{
|
||||
Attr* next; // in list of attrs for a token
|
||||
int attid; // Aabbr, etc.
|
||||
Rune* value;
|
||||
};
|
||||
|
||||
struct Token
|
||||
{
|
||||
int tag; // Ta, etc
|
||||
Rune* text; // text in Data, attribute text in tag
|
||||
Attr* attr; // list of Attrs
|
||||
int starti; // index into source buffer of token start
|
||||
};
|
||||
|
||||
extern Rune** tagnames;
|
||||
extern Rune** attrnames;
|
||||
|
||||
extern void _freetokens(Token* tarray, int n);
|
||||
extern Token* _gettoks(uchar* data, int datalen, int chset, int mtype, int* plen);
|
||||
extern int _tokaval(Token* t, int attid, Rune** pans, int xfer);
|
||||
|
||||
#pragma varargck type "T" Token*
|
||||
|
||||
#include "runetab.h"
|
||||
1384
src/libhtml/lex.c
Normal file
1384
src/libhtml/lex.c
Normal file
File diff suppressed because it is too large
Load diff
22
src/libhtml/mkfile
Normal file
22
src/libhtml/mkfile
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
<$SYS9/$systype/$objtype/mkfile
|
||||
|
||||
LIB=$LIB9/libhtml.a
|
||||
|
||||
OFILES=\
|
||||
build.$O\
|
||||
lex.$O\
|
||||
strinttab.$O\
|
||||
utils.$O\
|
||||
runetab.$O\
|
||||
|
||||
HFILES=\
|
||||
$SYS9/sys/include/html.h\
|
||||
impl.h\
|
||||
|
||||
UPDATE=\
|
||||
mkfile\
|
||||
$HFILES\
|
||||
${OFILES:%.$O=%.c}\
|
||||
${LIB:$SYS9/$systype/$objtype/%=$SYS9/$systype/386/%}\
|
||||
|
||||
<$SYS9/sys/src/cmd/mksyslib
|
||||
83
src/libhtml/runetab.c
Normal file
83
src/libhtml/runetab.c
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <draw.h>
|
||||
#include <html.h>
|
||||
#include "impl.h"
|
||||
|
||||
Rune **runeconsttab;
|
||||
char *_runeconsttab[] = {
|
||||
" ",
|
||||
" ",
|
||||
"",
|
||||
"#",
|
||||
"+",
|
||||
", ",
|
||||
"-",
|
||||
"-->",
|
||||
"1",
|
||||
"<",
|
||||
">",
|
||||
"?",
|
||||
"Index search terms:",
|
||||
"Reset",
|
||||
"Submit",
|
||||
"^0-9",
|
||||
"_ISINDEX_",
|
||||
"_blank",
|
||||
"_fr",
|
||||
"_no_name_submit_",
|
||||
"_parent",
|
||||
"_self",
|
||||
"_top",
|
||||
"application/x-www-form-urlencoded",
|
||||
"circle",
|
||||
"cm",
|
||||
"content-script-type",
|
||||
"disc",
|
||||
"em",
|
||||
"in",
|
||||
"javascript",
|
||||
"jscript",
|
||||
"jscript1.1",
|
||||
"mm",
|
||||
"none",
|
||||
"pi",
|
||||
"pt",
|
||||
"refresh",
|
||||
"select",
|
||||
"square",
|
||||
"textarea",
|
||||
};
|
||||
|
||||
Rune**
|
||||
cvtstringtab(char **tab, int n)
|
||||
{
|
||||
int i;
|
||||
Rune **rtab;
|
||||
|
||||
rtab = emalloc(n*sizeof(rtab[0]));
|
||||
for(i=0; i<n; i++)
|
||||
rtab[i] = toStr(tab[i], strlen(tab[i]), US_Ascii);
|
||||
return rtab;
|
||||
}
|
||||
|
||||
StringInt*
|
||||
cvtstringinttab(AsciiInt *tab, int n)
|
||||
{
|
||||
int i;
|
||||
StringInt *stab;
|
||||
|
||||
stab = emalloc(n*sizeof(stab[0]));
|
||||
for(i=0; i<n; i++){
|
||||
stab[i].key = toStr(tab[i].key, strlen(tab[i].key), US_Ascii);
|
||||
stab[i].val = tab[i].val;
|
||||
}
|
||||
return stab;
|
||||
}
|
||||
|
||||
void
|
||||
runetabinit(void)
|
||||
{
|
||||
runeconsttab = cvtstringtab(_runeconsttab, nelem(_runeconsttab));
|
||||
return;
|
||||
}
|
||||
59
src/libhtml/runetab.h
Normal file
59
src/libhtml/runetab.h
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
typedef struct AsciiInt AsciiInt;
|
||||
|
||||
struct AsciiInt {
|
||||
char* key;
|
||||
int val;
|
||||
};
|
||||
|
||||
enum {
|
||||
Ltab2space,
|
||||
Lspace,
|
||||
Lempty,
|
||||
Lhash,
|
||||
Lplus,
|
||||
Lcommaspace,
|
||||
Lminus,
|
||||
Larrow,
|
||||
Lone,
|
||||
Llt,
|
||||
Lgt,
|
||||
Lquestion,
|
||||
Lindex,
|
||||
Lreset,
|
||||
Lsubmit,
|
||||
Lnot0to9,
|
||||
Lisindex,
|
||||
L_blank,
|
||||
Lfr,
|
||||
Lnoname,
|
||||
L_parent,
|
||||
L_self,
|
||||
L_top,
|
||||
Lappl_form,
|
||||
Lcircle,
|
||||
Lcm,
|
||||
Lcontent,
|
||||
Ldisc,
|
||||
Lem,
|
||||
Lin,
|
||||
Ljavascript,
|
||||
Ljscript,
|
||||
Ljscript1,
|
||||
Lmm,
|
||||
Lnone,
|
||||
Lpi,
|
||||
Lpt,
|
||||
Lrefresh,
|
||||
Lselect,
|
||||
Lsquare,
|
||||
Ltextarea,
|
||||
};
|
||||
|
||||
#define L(x) runeconsttab[(x)]
|
||||
|
||||
extern Rune **runeconsttab;
|
||||
|
||||
/* XXX: for unix port only */
|
||||
Rune **cvtstringtab(char**, int);
|
||||
StringInt *cvtstringinttab(AsciiInt*, int);
|
||||
void runetabinit(void);
|
||||
64
src/libhtml/strinttab.c
Normal file
64
src/libhtml/strinttab.c
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <draw.h>
|
||||
#include <html.h>
|
||||
#include "impl.h"
|
||||
|
||||
// Do case-insensitive lookup of key[0:keylen] in t[0:n] (key part),
|
||||
// returning 1 if found, 0 if not.
|
||||
// Array t must be sorted in increasing lexicographic order of key.
|
||||
// If found, return corresponding val in *pans.
|
||||
int
|
||||
_lookup(StringInt* t, int n, Rune* key, int keylen, int* pans)
|
||||
{
|
||||
int min;
|
||||
int max;
|
||||
int try;
|
||||
int cmpresult;
|
||||
|
||||
min = 0;
|
||||
max = n - 1;
|
||||
while(min <= max) {
|
||||
try = (min + max)/2;
|
||||
cmpresult = _Strncmpci(key, keylen, t[try].key);
|
||||
if(cmpresult > 0)
|
||||
min = try + 1;
|
||||
else if(cmpresult < 0)
|
||||
max = try - 1;
|
||||
else {
|
||||
*pans = t[try].val;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return first key in t[0:n] that corresponds to val,
|
||||
// nil if none.
|
||||
Rune*
|
||||
_revlookup(StringInt* t, int n, int val)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < n; i++)
|
||||
if(t[i].val == val)
|
||||
return t[i].key;
|
||||
return nil;
|
||||
}
|
||||
|
||||
// Make a StringInt table out of a[0:n], mapping each string
|
||||
// to its index. Check that entries are in alphabetical order.
|
||||
StringInt*
|
||||
_makestrinttab(Rune** a, int n)
|
||||
{
|
||||
StringInt* ans;
|
||||
int i;
|
||||
|
||||
ans = (StringInt*)emalloc(n * sizeof(StringInt));
|
||||
for(i = 0; i < n; i++) {
|
||||
ans[i].key = a[i];
|
||||
ans[i].val = i;
|
||||
assert(i == 0 || runestrcmp(a[i], a[i - 1]) >= 0);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
591
src/libhtml/utils.c
Normal file
591
src/libhtml/utils.c
Normal file
|
|
@ -0,0 +1,591 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <draw.h>
|
||||
#include <html.h>
|
||||
#include "impl.h"
|
||||
|
||||
Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
|
||||
Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
|
||||
|
||||
// All lists start out like List structure.
|
||||
// List itself can be used as list of int.
|
||||
int
|
||||
_listlen(List* l)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
while(l != nil) {
|
||||
l = l->next;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// Cons
|
||||
List*
|
||||
_newlist(int val, List* rest)
|
||||
{
|
||||
List* ans;
|
||||
|
||||
ans = (List*)emalloc(sizeof(List));
|
||||
ans->val = val;
|
||||
ans->next = rest;
|
||||
return ans;
|
||||
}
|
||||
|
||||
// Reverse a list in place
|
||||
List*
|
||||
_revlist(List* l)
|
||||
{
|
||||
List* newl;
|
||||
List* nextl;
|
||||
|
||||
newl = nil;
|
||||
while(l != nil) {
|
||||
nextl = l->next;
|
||||
l->next = newl;
|
||||
newl = l;
|
||||
l = nextl;
|
||||
}
|
||||
return newl;
|
||||
}
|
||||
|
||||
// The next few routines take a "character class" as argument.
|
||||
// e.g., "a-zA-Z", or "^ \t\n"
|
||||
// (ranges indicated by - except in first position;
|
||||
// ^ is first position means "not in" the following class)
|
||||
|
||||
// Splitl splits s[0:n] just before first character of class cl.
|
||||
// Answers go in (p1, n1) and (p2, n2).
|
||||
// If no split, the whole thing goes in the first component.
|
||||
// Note: answers contain pointers into original string.
|
||||
void
|
||||
_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
p = _Strnclass(s, cl, n);
|
||||
*p1 = s;
|
||||
if(p == nil) {
|
||||
*n1 = n;
|
||||
*p2 = nil;
|
||||
*n2 = 0;
|
||||
}
|
||||
else {
|
||||
*p2 = p;
|
||||
*n1 = p-s;
|
||||
*n2 = n-*n1;
|
||||
}
|
||||
}
|
||||
|
||||
// Splitr splits s[0:n] just after last character of class cl.
|
||||
// Answers go in (p1, n1) and (p2, n2).
|
||||
// If no split, the whole thing goes in the last component.
|
||||
// Note: answers contain pointers into original string.
|
||||
void
|
||||
_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
p = _Strnrclass(s, cl, n);
|
||||
if(p == nil) {
|
||||
*p1 = nil;
|
||||
*n1 = 0;
|
||||
*p2 = s;
|
||||
*n2 = n;
|
||||
}
|
||||
else {
|
||||
*p1 = s;
|
||||
*p2 = p+1;
|
||||
*n1 = *p2-s;
|
||||
*n2 = n-*n1;
|
||||
}
|
||||
}
|
||||
|
||||
// Splitall splits s[0:n] into parts that are separated by characters from class cl.
|
||||
// Each part will have nonzero length.
|
||||
// At most alen parts are found, and pointers to their starts go into
|
||||
// the strarr array, while their lengths go into the lenarr array.
|
||||
// The return value is the number of parts found.
|
||||
int
|
||||
_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
|
||||
{
|
||||
int i;
|
||||
Rune* p;
|
||||
Rune* q;
|
||||
Rune* slast;
|
||||
|
||||
if(s == nil || n == 0)
|
||||
return 0;
|
||||
i = 0;
|
||||
p = s;
|
||||
slast = s+n;
|
||||
while(p < slast && i < alen) {
|
||||
while(p < slast && _inclass(*p, cl))
|
||||
p++;
|
||||
if(p == slast)
|
||||
break;
|
||||
q = _Strnclass(p, cl, slast-p);
|
||||
if(q == nil)
|
||||
q = slast;
|
||||
assert(q > p && q <= slast);
|
||||
strarr[i] = p;
|
||||
lenarr[i] = q-p;
|
||||
i++;
|
||||
p = q;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Find part of s that excludes leading and trailing whitespace,
|
||||
// and return that part in *pans (and its length in *panslen).
|
||||
void
|
||||
_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
|
||||
{
|
||||
Rune* p;
|
||||
Rune* q;
|
||||
|
||||
p = nil;
|
||||
if(n > 0) {
|
||||
p = _Strnclass(s, notwhitespace, n);
|
||||
if(p != nil) {
|
||||
q = _Strnrclass(s, notwhitespace, n);
|
||||
assert(q != nil);
|
||||
n = q+1-p;
|
||||
}
|
||||
}
|
||||
*pans = p;
|
||||
*panslen = n;
|
||||
}
|
||||
|
||||
// _Strclass returns a pointer to the first element of s that is
|
||||
// a member of class cl, nil if none.
|
||||
Rune*
|
||||
_Strclass(Rune* s, Rune* cl)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
for(p = s; *p != 0; p++)
|
||||
if(_inclass(*p, cl))
|
||||
return p;
|
||||
return nil;
|
||||
}
|
||||
|
||||
// _Strnclass returns a pointer to the first element of s[0:n] that is
|
||||
// a member of class cl, nil if none.
|
||||
Rune*
|
||||
_Strnclass(Rune* s, Rune* cl, int n)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
for(p = s; n-- && *p != 0; p++)
|
||||
if(_inclass(*p, cl))
|
||||
return p;
|
||||
return nil;
|
||||
}
|
||||
|
||||
// _Strrclass returns a pointer to the last element of s that is
|
||||
// a member of class cl, nil if none
|
||||
Rune*
|
||||
_Strrclass(Rune* s, Rune* cl)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
if(s == nil || *s == 0)
|
||||
return nil;
|
||||
p = s + runestrlen(s) - 1;
|
||||
while(p >= s) {
|
||||
if(_inclass(*p, cl))
|
||||
return p;
|
||||
p--;
|
||||
};
|
||||
return nil;
|
||||
}
|
||||
|
||||
// _Strnrclass returns a pointer to the last element of s[0:n] that is
|
||||
// a member of class cl, nil if none
|
||||
Rune*
|
||||
_Strnrclass(Rune* s, Rune* cl, int n)
|
||||
{
|
||||
Rune* p;
|
||||
|
||||
if(s == nil || *s == 0 || n == 0)
|
||||
return nil;
|
||||
p = s + n - 1;
|
||||
while(p >= s) {
|
||||
if(_inclass(*p, cl))
|
||||
return p;
|
||||
p--;
|
||||
};
|
||||
return nil;
|
||||
}
|
||||
|
||||
// Is c in the class cl?
|
||||
int
|
||||
_inclass(Rune c, Rune* cl)
|
||||
{
|
||||
int n;
|
||||
int ans;
|
||||
int negate;
|
||||
int i;
|
||||
|
||||
n = _Strlen(cl);
|
||||
if(n == 0)
|
||||
return 0;
|
||||
ans = 0;
|
||||
negate = 0;
|
||||
if(cl[0] == '^') {
|
||||
negate = 1;
|
||||
cl++;
|
||||
n--;
|
||||
}
|
||||
for(i = 0; i < n; i++) {
|
||||
if(cl[i] == '-' && i > 0 && i < n - 1) {
|
||||
if(c >= cl[i - 1] && c <= cl[i + 1]) {
|
||||
ans = 1;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
else if(c == cl[i]) {
|
||||
ans = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(negate)
|
||||
ans = !ans;
|
||||
return ans;
|
||||
}
|
||||
|
||||
// Is pre a prefix of s?
|
||||
int
|
||||
_prefix(Rune* pre, Rune* s)
|
||||
{
|
||||
int ns;
|
||||
int n;
|
||||
int k;
|
||||
|
||||
ns = _Strlen(s);
|
||||
n = _Strlen(pre);
|
||||
if(ns < n)
|
||||
return 0;
|
||||
for(k = 0; k < n; k++) {
|
||||
if(pre[k] != s[k])
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Number of runes in (null-terminated) s
|
||||
int
|
||||
_Strlen(Rune* s)
|
||||
{
|
||||
if(s == nil)
|
||||
return 0;
|
||||
return runestrlen(s);
|
||||
}
|
||||
|
||||
// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
|
||||
int
|
||||
_Strcmp(Rune *s1, Rune *s2)
|
||||
{
|
||||
if(s1 == nil)
|
||||
return (s2 == nil || *s2 == 0) ? 0 : -1;
|
||||
if(s2 == nil)
|
||||
return (*s1 == 0) ? 0 : 1;
|
||||
return runestrcmp(s1, s2);
|
||||
}
|
||||
|
||||
// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
|
||||
// Also, do a case-insensitive match, assuming s2
|
||||
// has no chars in [A-Z], only their lowercase versions.
|
||||
// (This routine is used for in-place keyword lookup, where s2 is in a keyword
|
||||
// list and s1 is some substring, possibly mixed-case, in a buffer.)
|
||||
int
|
||||
_Strncmpci(Rune *s1, int n1, Rune *s2)
|
||||
{
|
||||
Rune c1, c2;
|
||||
|
||||
for(;;) {
|
||||
if(n1-- == 0) {
|
||||
if(*s2 == 0)
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
c1 = *s1++;
|
||||
c2 = *s2++;
|
||||
if(c1 >= 'A' && c1 <= 'Z')
|
||||
c1 = c1 - 'A' + 'a';
|
||||
if(c1 != c2) {
|
||||
if(c1 > c2)
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// emalloc and copy
|
||||
Rune*
|
||||
_Strdup(Rune* s)
|
||||
{
|
||||
if(s == nil)
|
||||
return nil;
|
||||
return _Strndup(s, runestrlen(s));
|
||||
}
|
||||
|
||||
// emalloc and copy n chars of s (assume s is at least that long),
|
||||
// and add 0 terminator.
|
||||
// Return nil if n==0.
|
||||
Rune*
|
||||
_Strndup(Rune* s, int n)
|
||||
{
|
||||
Rune* ans;
|
||||
|
||||
if(n <= 0)
|
||||
return nil;
|
||||
ans = _newstr(n);
|
||||
memmove(ans, s, n*sizeof(Rune));
|
||||
ans[n] = 0;
|
||||
return ans;
|
||||
}
|
||||
// emalloc enough room for n Runes, plus 1 null terminator.
|
||||
// (Not initialized to anything.)
|
||||
Rune*
|
||||
_newstr(int n)
|
||||
{
|
||||
return (Rune*)emalloc((n+1)*sizeof(Rune));
|
||||
}
|
||||
|
||||
// emalloc and copy s+t
|
||||
Rune*
|
||||
_Strdup2(Rune* s, Rune* t)
|
||||
{
|
||||
int ns, nt;
|
||||
Rune* ans;
|
||||
Rune* p;
|
||||
|
||||
ns = _Strlen(s);
|
||||
nt = _Strlen(t);
|
||||
if(ns+nt == 0)
|
||||
return nil;
|
||||
ans = _newstr(ns+nt);
|
||||
p = _Stradd(ans, s, ns);
|
||||
p = _Stradd(p, t, nt);
|
||||
*p = 0;
|
||||
return ans;
|
||||
}
|
||||
|
||||
// Return emalloc'd substring s[start:stop],
|
||||
Rune*
|
||||
_Strsubstr(Rune* s, int start, int stop)
|
||||
{
|
||||
Rune* t;
|
||||
|
||||
if(start == stop)
|
||||
return nil;
|
||||
t = _Strndup(s+start, stop-start);
|
||||
return t;
|
||||
}
|
||||
|
||||
// Copy n chars to s1 from s2, and return s1+n
|
||||
Rune*
|
||||
_Stradd(Rune* s1, Rune* s2, int n)
|
||||
{
|
||||
if(n == 0)
|
||||
return s1;
|
||||
memmove(s1, s2, n*sizeof(Rune));
|
||||
return s1+n;
|
||||
}
|
||||
|
||||
// Like strtol, but converting from Rune* string
|
||||
|
||||
//#define LONG_MAX 2147483647L
|
||||
//#define LONG_MIN -2147483648L
|
||||
|
||||
long
|
||||
_Strtol(Rune* nptr, Rune** endptr, int base)
|
||||
{
|
||||
Rune* p;
|
||||
long n, nn;
|
||||
int c, ovfl, v, neg, ndig;
|
||||
|
||||
p = nptr;
|
||||
neg = 0;
|
||||
n = 0;
|
||||
ndig = 0;
|
||||
ovfl = 0;
|
||||
|
||||
/*
|
||||
* White space
|
||||
*/
|
||||
for(;;p++){
|
||||
switch(*p){
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case '\v':
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sign
|
||||
*/
|
||||
if(*p=='-' || *p=='+')
|
||||
if(*p++ == '-')
|
||||
neg = 1;
|
||||
|
||||
/*
|
||||
* Base
|
||||
*/
|
||||
if(base==0){
|
||||
if(*p != '0')
|
||||
base = 10;
|
||||
else{
|
||||
base = 8;
|
||||
if(p[1]=='x' || p[1]=='X'){
|
||||
p += 2;
|
||||
base = 16;
|
||||
}
|
||||
}
|
||||
}else if(base==16 && *p=='0'){
|
||||
if(p[1]=='x' || p[1]=='X')
|
||||
p += 2;
|
||||
}else if(base<0 || 36<base)
|
||||
goto Return;
|
||||
|
||||
/*
|
||||
* Non-empty sequence of digits
|
||||
*/
|
||||
for(;; p++,ndig++){
|
||||
c = *p;
|
||||
v = base;
|
||||
if('0'<=c && c<='9')
|
||||
v = c - '0';
|
||||
else if('a'<=c && c<='z')
|
||||
v = c - 'a' + 10;
|
||||
else if('A'<=c && c<='Z')
|
||||
v = c - 'A' + 10;
|
||||
if(v >= base)
|
||||
break;
|
||||
nn = n*base + v;
|
||||
if(nn < n)
|
||||
ovfl = 1;
|
||||
n = nn;
|
||||
}
|
||||
|
||||
Return:
|
||||
if(ndig == 0)
|
||||
p = nptr;
|
||||
if(endptr)
|
||||
*endptr = p;
|
||||
if(ovfl){
|
||||
if(neg)
|
||||
return LONG_MIN;
|
||||
return LONG_MAX;
|
||||
}
|
||||
if(neg)
|
||||
return -n;
|
||||
return n;
|
||||
}
|
||||
|
||||
// Convert buf[0:n], bytes whose character set is chset,
|
||||
// into a emalloc'd null-terminated Unicode string.
|
||||
Rune*
|
||||
toStr(uchar* buf, int n, int chset)
|
||||
{
|
||||
int i;
|
||||
int m;
|
||||
Rune ch;
|
||||
Rune* ans;
|
||||
|
||||
switch(chset) {
|
||||
case US_Ascii:
|
||||
case ISO_8859_1:
|
||||
ans = (Rune*)emalloc((n+1)*sizeof(Rune));
|
||||
for(i = 0; i < n; i++)
|
||||
ans[i] = buf[i];
|
||||
ans[n] = 0;
|
||||
break;
|
||||
|
||||
case UTF_8:
|
||||
m = 0;
|
||||
for(i = 0; i < n; ) {
|
||||
i += chartorune(&ch, (char*)(buf+i));
|
||||
m++;
|
||||
}
|
||||
ans = (Rune*)emalloc((m+1)*sizeof(Rune));
|
||||
m = 0;
|
||||
for(i = 0; i < n; ) {
|
||||
i += chartorune(&ch, (char*)(buf+i));
|
||||
ans[m++] = ch;
|
||||
}
|
||||
ans[m] = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
ans = nil;
|
||||
assert(0);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
// Convert buf[0:n], Unicode characters,
|
||||
// into an emalloc'd null-terminated string in character set chset.
|
||||
// Use 0x80 for unconvertable characters.
|
||||
uchar*
|
||||
fromStr(Rune* buf, int n, int chset)
|
||||
{
|
||||
uchar* ans;
|
||||
int i, lim, m;
|
||||
Rune ch;
|
||||
uchar* p;
|
||||
uchar s[UTFmax];
|
||||
|
||||
ans = nil;
|
||||
switch(chset) {
|
||||
case US_Ascii:
|
||||
case ISO_8859_1:
|
||||
ans = (uchar*)emalloc(n+1);
|
||||
lim = (chset==US_Ascii)? 127 : 255;
|
||||
for(i = 0; i < n; i++) {
|
||||
ch = buf[i];
|
||||
if(ch > lim)
|
||||
ch = 0x80;
|
||||
ans[i] = ch;
|
||||
}
|
||||
ans[n] = 0;
|
||||
break;
|
||||
|
||||
case UTF_8:
|
||||
m = 0;
|
||||
for(i = 0; i < n; i++) {
|
||||
m += runetochar((char*)s, &buf[i]);
|
||||
}
|
||||
ans = (uchar*)emalloc(m+1);
|
||||
p = ans;
|
||||
for(i = 0; i < n; i++)
|
||||
p += runetochar((char*)p, &buf[i]);
|
||||
*p = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
return ans;
|
||||
|
||||
}
|
||||
|
||||
// Convert n to emalloc'd String.
|
||||
Rune*
|
||||
_ltoStr(int n)
|
||||
{
|
||||
int m;
|
||||
uchar buf[20];
|
||||
|
||||
m = snprint((char*)buf, sizeof(buf), "%d", n);
|
||||
return toStr(buf, m, US_Ascii);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue