Many small edits.

This commit is contained in:
rsc 2005-01-13 04:49:19 +00:00
parent 741f510ce7
commit c8b6342d3c
160 changed files with 2204 additions and 864 deletions

View file

@ -134,6 +134,9 @@ void movieprintkey(void);
long pgwnextoff(long);
void pgwprintentry(Entry,int);
void pgwprintkey(void);
void rogetprintentry(Entry, int);
long rogetnextoff(long);
void rogetprintkey(void);
long slangnextoff(long);
void slangprintentry(Entry, int);
void slangprintkey(void);

View file

@ -2,7 +2,7 @@
TARG=dict
LFILES=oed.$O ahd.$O pcollins.$O pcollinsg.$O movie.$O slang.$O robert.$O\
world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O
world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O roget.$O\
OFILES=dict.$O\
$LFILES\

147
src/cmd/dict/roget.c Normal file
View file

@ -0,0 +1,147 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include "dict.h"
/* Roget's Thesaurus from project Gutenberg */
/* static long Last = 0; */
void
rogetprintentry(Entry e, int cmd)
{
int spc;
char c, *p;
spc = 0;
p = e.start;
if(cmd == 'h'){
while(!isspace(*p) && p < e.end)
p++;
while(strncmp(p, " -- ", 4) != 0 && p < e.end){
while(isspace(*p) && p < e.end)
p++;
if (*p == '[' || *p == '{'){
c = (*p == '[')? ']': '}';
while(*p != c && p < e.end)
p++;
p++;
continue;
}
if (isdigit(*p) || ispunct(*p)){
while(!isspace(*p) && p < e.end)
p++;
continue;
}
if (isspace(*p))
spc = 1;
else
if (spc){
outchar(' ');
spc = 0;
}
while(!isspace(*p) && p < e.end)
outchar(*p++);
}
return;
}
while(p < e.end && !isspace(*p))
p++;
while(p < e.end && isspace(*p))
p++;
while (p < e.end){
if (p < e.end -4 && strncmp(p, " -- ", 4) == 0){ /* first line */
outnl(2);
p += 4;
spc = 0;
}
if (p < e.end -2 && strncmp(p, "[ ", 4) == 0){ /* twiddle layout */
outchars(" [");
continue;
}
if (p < e.end -4 && strncmp(p, "&c (", 4) == 0){ /* usefull xref */
if (spc)
outchar(' ');
outchar('/');
while(p < e.end && *p != '(')
p++;
p++;
while(p < e.end && *p != ')')
outchar(*p++);
p++;
while(p < e.end && isspace(*p))
p++;
while(p < e.end && isdigit(*p))
p++;
outchar('/');
continue;
}
if (p < e.end -3 && strncmp(p, "&c ", 3) == 0){ /* less usefull xref */
while(p < e.end && !isdigit(*p))
p++;
while(p < e.end && isdigit(*p))
p++;
continue;
}
if (*p == '\n' && p < (e.end -1)){ /* their newlines */
spc = 0;
p++;
if (isspace(*p)){ /* their continuation line */
while (isspace(*p))
p++;
p--;
}
else{
outnl(2);
}
}
if (spc && *p != ';' && *p != '.' &&
*p != ',' && !isspace(*p)){ /* drop spaces before punct */
spc = 0;
outchar(' ');
}
if (isspace(*p))
spc = 1;
else
outchar(*p);
p++;
}
outnl(0);
}
long
rogetnextoff(long fromoff)
{
int i;
vlong l;
char *p;
Bseek(bdict, fromoff, 0);
Brdline(bdict, '\n');
while ((p = Brdline(bdict, '\n')) != nil){
l = Blinelen(bdict);
if (!isdigit(*p))
continue;
for (i = 0; i < l-4; i++)
if (strncmp(p+i, " -- ", 4) == 0)
return Boffset(bdict)-l;
}
return Boffset(bdict);
}
void
rogetprintkey(void)
{
Bprint(bout, "No pronunciation key.\n");
}

View file

@ -16,6 +16,9 @@ Dict dicts[] = {
{"thesaurus", "Collins Thesaurus",
"#9/dict/thesaurus", "#9/dict/thesindex",
thesnextoff, thesprintentry, thesprintkey},
{"roget", "Project Gutenberg Roget's Thesaurus",
"#9/dict/roget", "#9/dict/rogetindex",
rogetnextoff, rogetprintentry, rogetprintkey},
{"ce", "Gendai Chinese->English",
"#9/dict/world/sansdata/sandic24.dat",