2003-11-25 03:37:45 +00:00
|
|
|
|
#include <u.h>
|
|
|
|
|
|
#include <libc.h>
|
|
|
|
|
|
#include <bio.h>
|
|
|
|
|
|
#include "dict.h"
|
|
|
|
|
|
|
|
|
|
|
|
Dict dicts[] = {
|
|
|
|
|
|
{"oed", "Oxford English Dictionary, 2nd Ed.",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/oed2", "#9/dict/oed2index",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
oednextoff, oedprintentry, oedprintkey},
|
|
|
|
|
|
{"ahd", "American Heritage Dictionary, 2nd College Ed.",
|
|
|
|
|
|
"ahd/DICT.DB", "ahd/index",
|
|
|
|
|
|
ahdnextoff, ahdprintentry, ahdprintkey},
|
|
|
|
|
|
{"pgw", "Project Gutenberg Webster Dictionary",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/pgw", "#9/dict/pgwindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pgwnextoff, pgwprintentry, pgwprintkey},
|
|
|
|
|
|
{"thesaurus", "Collins Thesaurus",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/thesaurus", "#9/dict/thesindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
thesnextoff, thesprintentry, thesprintkey},
|
2005-01-13 04:49:19 +00:00
|
|
|
|
{"roget", "Project Gutenberg Roget's Thesaurus",
|
|
|
|
|
|
"#9/dict/roget", "#9/dict/rogetindex",
|
|
|
|
|
|
rogetnextoff, rogetprintentry, rogetprintkey},
|
2003-11-25 03:37:45 +00:00
|
|
|
|
|
|
|
|
|
|
{"ce", "Gendai Chinese->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic24.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/ceindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"ceh", "Gendai Chinese->English (Hanzi index)",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic24.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/cehindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"ec", "Gendai English->Chinese",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic24.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/ecindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"dae", "Gyldendal Danish->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/gylddata/sandic30.dat",
|
|
|
|
|
|
"#9/dict/world/gylddata/daeindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"eda", "Gyldendal English->Danish",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/gylddata/sandic29.dat",
|
|
|
|
|
|
"#9/dict/world/gylddata/edaindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"due", "Wolters-Noordhoff Dutch->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/woltdata/sandic07.dat",
|
|
|
|
|
|
"#9/dict/world/woltdata/deindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"edu", "Wolters-Noordhoff English->Dutch",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/woltdata/sandic06.dat",
|
|
|
|
|
|
"#9/dict/world/woltdata/edindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"fie", "WSOY Finnish->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/werndata/sandic32.dat",
|
|
|
|
|
|
"#9/dict/world/werndata/fieindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"efi", "WSOY English->Finnish",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/werndata/sandic31.dat",
|
|
|
|
|
|
"#9/dict/world/werndata/efiindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"fe", "Collins French->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/fe", "#9/dict/feindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
{"ef", "Collins English->French",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/ef", "#9/dict/efindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"ge", "Collins German->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/ge", "#9/dict/geindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollgnextoff, pcollgprintentry, pcollgprintkey},
|
|
|
|
|
|
{"eg", "Collins English->German",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/eg", "#9/dict/egindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollgnextoff, pcollgprintentry, pcollgprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"ie", "Collins Italian->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/ie", "#9/dict/ieindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
{"ei", "Collins English->Italian",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/ei", "#9/dict/eiindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"je", "Sanshusha Japanese->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic18.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/jeindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"jek", "Sanshusha Japanese->English (Kanji index)",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic18.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/jekindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"ej", "Sanshusha English->Japanese",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic18.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/ejindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"tjeg", "Sanshusha technical Japanese->English,German",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic16.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/tjegindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic16.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/tjegkindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"tegj", "Sanshusha technical English->German,Japanese",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic16.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/tegjindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"tgje", "Sanshusha technical German->Japanese,English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/sansdata/sandic16.dat",
|
|
|
|
|
|
"#9/dict/world/sansdata/tgjeindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"ne", "Kunnskapforlaget Norwegian->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/kunndata/sandic28.dat",
|
|
|
|
|
|
"#9/dict/world/kunndata/neindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"en", "Kunnskapforlaget English->Norwegian",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/kunndata/sandic27.dat",
|
|
|
|
|
|
"#9/dict/world/kunndata/enindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"re", "Leon Ungier Russian->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/re", "#9/dict/reindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
simplenextoff, simpleprintentry, simpleprintkey},
|
|
|
|
|
|
{"er", "Leon Ungier English->Russian",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/re", "#9/dict/erindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
simplenextoff, simpleprintentry, simpleprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"se", "Collins Spanish->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/se", "#9/dict/seindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
{"es", "Collins English->Spanish",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/es", "#9/dict/esindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
pcollnextoff, pcollprintentry, pcollprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"swe", "Esselte Studium Swedish->English",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/essedata/sandic34.dat",
|
|
|
|
|
|
"#9/dict/world/essedata/sweindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
{"esw", "Esselte Studium English->Swedish",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/world/essedata/sandic33.dat",
|
|
|
|
|
|
"#9/dict/world/essedata/eswindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
worldnextoff, worldprintentry, worldprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"movie", "Movies -- by title",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"movie/data", "#9/dict/movtindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
movienextoff, movieprintentry, movieprintkey},
|
|
|
|
|
|
{"moviea", "Movies -- by actor",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"movie/data", "#9/dict/movaindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
movienextoff, movieprintentry, movieprintkey},
|
|
|
|
|
|
{"movied", "Movies -- by director",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"movie/data", "#9/dict/movdindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
movienextoff, movieprintentry, movieprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"slang", "English Slang",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/slang", "#9/dict/slangindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
slangnextoff, slangprintentry, slangprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{"robert", "Robert Électronique",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/robert/_pointers", "#9/dict/robert/_index",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
robertnextoff, robertindexentry, robertprintkey},
|
|
|
|
|
|
{"robertv", "Robert Électronique - formes des verbes",
|
2003-12-11 17:48:38 +00:00
|
|
|
|
"#9/dict/robert/flex.rob", "#9/dict/robert/_flexindex",
|
2003-11-25 03:37:45 +00:00
|
|
|
|
robertnextflex, robertflexentry, robertprintkey},
|
|
|
|
|
|
|
|
|
|
|
|
{0, 0, 0, 0, 0}
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct Lig Lig;
|
|
|
|
|
|
struct Lig {
|
|
|
|
|
|
Rune start; /* accent rune */
|
|
|
|
|
|
Rune pairs[100]; /* <char,accented version> pairs */
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2004-03-26 01:51:33 +00:00
|
|
|
|
/* keep in sync with dict.h */
|
2003-11-25 03:37:45 +00:00
|
|
|
|
static Lig ligtab[Nligs] = {
|
2004-03-26 01:51:33 +00:00
|
|
|
|
{0xb4, {0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}},
|
|
|
|
|
|
{0x2cb, {0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}},
|
|
|
|
|
|
{0xa8, {0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}},
|
|
|
|
|
|
{0xb8, {0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}},
|
|
|
|
|
|
{0x2dc, {0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}},
|
|
|
|
|
|
{0x2d8, {0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}},
|
|
|
|
|
|
{0x2da, {0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}},
|
|
|
|
|
|
{0x2d9, {0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}},
|
|
|
|
|
|
{0x2e, {0}},
|
|
|
|
|
|
{0x2322, {0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}},
|
|
|
|
|
|
{0x32f, {0}},
|
|
|
|
|
|
{0x2db, {0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}},
|
|
|
|
|
|
{0xaf, {0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}},
|
|
|
|
|
|
{0x2c7, {0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}},
|
|
|
|
|
|
{0x2bd, {0}},
|
|
|
|
|
|
{0x2bc, {0}},
|
|
|
|
|
|
{0x32e, {0}}
|
2003-11-25 03:37:45 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Rune multitab[Nmulti][5] = {
|
2004-03-26 01:51:33 +00:00
|
|
|
|
{0x2bd, 0x3b1, 0},
|
|
|
|
|
|
{0x2bc, 0x3b1, 0},
|
|
|
|
|
|
{0x61, 0x6e, 0x64, 0},
|
|
|
|
|
|
{0x61, 0x2f, 0x71, 0},
|
|
|
|
|
|
{0x3c, 0x7c, 0},
|
|
|
|
|
|
{0x2e, 0x2e, 0},
|
|
|
|
|
|
{0x2e, 0x2e, 0x2e, 0},
|
|
|
|
|
|
{0x2bd, 0x3b5, 0},
|
|
|
|
|
|
{0x2bc, 0x3b5, 0},
|
|
|
|
|
|
{0x2014, 0x2014, 0},
|
|
|
|
|
|
{0x2bd, 0x3b7, 0},
|
|
|
|
|
|
{0x2bc, 0x3b7, 0},
|
|
|
|
|
|
{0x2bd, 0x3b9, 0},
|
|
|
|
|
|
{0x2bc, 0x3b9, 0},
|
|
|
|
|
|
{0x63, 0x74, 0},
|
|
|
|
|
|
{0x66, 0x66, 0},
|
|
|
|
|
|
{0x66, 0x66, 0x69, 0},
|
|
|
|
|
|
{0x66, 0x66, 0x6c, 0},
|
|
|
|
|
|
{0x66, 0x6c, 0},
|
|
|
|
|
|
{0x66, 0x69, 0},
|
|
|
|
|
|
{0x26b, 0x26b, 0},
|
|
|
|
|
|
{0x73, 0x74, 0},
|
|
|
|
|
|
{0x2bd, 0x3bf, 0},
|
|
|
|
|
|
{0x2bc, 0x3bf, 0},
|
|
|
|
|
|
{0x6f, 0x72, 0},
|
|
|
|
|
|
{0x2bd, 0x3c1, 0},
|
|
|
|
|
|
{0x2bc, 0x3c1, 0},
|
|
|
|
|
|
{0x7e, 0x7e, 0},
|
|
|
|
|
|
{0x2bd, 0x3c5, 0},
|
|
|
|
|
|
{0x2bc, 0x3c5, 0},
|
|
|
|
|
|
{0x2bd, 0x3c9, 0},
|
|
|
|
|
|
{0x2bc, 0x3c9, 0},
|
|
|
|
|
|
{0x6f, 0x65, 0},
|
|
|
|
|
|
{0x20, 0x20, 0},
|
2003-11-25 03:37:45 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#define risupper(r) (0x41 <= (r) && (r) <= 0x5a)
|
|
|
|
|
|
#define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
|
|
|
|
|
|
#define rtolower(r) ((r)-'A'+'a')
|
|
|
|
|
|
|
|
|
|
|
|
static Rune latin_fold_tab[] =
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Table to fold latin 1 characters to ASCII equivalents
|
|
|
|
|
|
based at Rune value 0xc0
|
|
|
|
|
|
|
|
|
|
|
|
À Á Â Ã Ä Å Æ Ç
|
|
|
|
|
|
È É Ê Ë Ì Í Î Ï
|
|
|
|
|
|
Ð Ñ Ò Ó Ô Õ Ö ×
|
|
|
|
|
|
Ø Ù Ú Û Ü Ý Þ ß
|
|
|
|
|
|
à á â ã ä å æ ç
|
|
|
|
|
|
è é ê ë ì í î ï
|
|
|
|
|
|
ð ñ ò ó ô õ ö ÷
|
|
|
|
|
|
ø ù ú û ü ý þ ÿ
|
|
|
|
|
|
*/
|
|
|
|
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
|
|
|
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
|
|
|
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
|
|
|
|
|
'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
|
|
|
|
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
|
|
|
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
|
|
|
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
|
|
|
|
|
'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static Rune *ttabstack[20];
|
|
|
|
|
|
static int ntt;
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* tab is an array of n Assoc's, sorted by key.
|
|
|
|
|
|
* Look for key in tab, and return corresponding val
|
|
|
|
|
|
* or -1 if not there
|
|
|
|
|
|
*/
|
|
|
|
|
|
long
|
|
|
|
|
|
lookassoc(Assoc *tab, int n, char *key)
|
|
|
|
|
|
{
|
|
|
|
|
|
Assoc *q;
|
|
|
|
|
|
long i, low, high;
|
|
|
|
|
|
int r;
|
|
|
|
|
|
|
|
|
|
|
|
for(low = -1, high = n; high > low+1; ){
|
|
|
|
|
|
i = (high+low)/2;
|
|
|
|
|
|
q = &tab[i];
|
|
|
|
|
|
if((r=strcmp(key, q->key))<0)
|
|
|
|
|
|
high = i;
|
|
|
|
|
|
else if(r == 0)
|
|
|
|
|
|
return q->val;
|
|
|
|
|
|
else
|
|
|
|
|
|
low=i;
|
|
|
|
|
|
}
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
long
|
|
|
|
|
|
looknassoc(Nassoc *tab, int n, long key)
|
|
|
|
|
|
{
|
|
|
|
|
|
Nassoc *q;
|
|
|
|
|
|
long i, low, high;
|
|
|
|
|
|
|
|
|
|
|
|
for(low = -1, high = n; high > low+1; ){
|
|
|
|
|
|
i = (high+low)/2;
|
|
|
|
|
|
q = &tab[i];
|
|
|
|
|
|
if(key < q->key)
|
|
|
|
|
|
high = i;
|
|
|
|
|
|
else if(key == q->key)
|
|
|
|
|
|
return q->val;
|
|
|
|
|
|
else
|
|
|
|
|
|
low=i;
|
|
|
|
|
|
}
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
err(char *fmt, ...)
|
|
|
|
|
|
{
|
|
|
|
|
|
char buf[1000];
|
|
|
|
|
|
va_list v;
|
|
|
|
|
|
|
|
|
|
|
|
va_start(v, fmt);
|
|
|
|
|
|
vsnprint(buf, sizeof(buf), fmt, v);
|
|
|
|
|
|
va_end(v);
|
|
|
|
|
|
fprint(2, "%s: %s\n", argv0, buf);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Write the rune r to bout, keeping track of line length
|
|
|
|
|
|
* and breaking the lines (at blanks) when they get too long
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
outrune(long r)
|
|
|
|
|
|
{
|
|
|
|
|
|
if(outinhibit)
|
|
|
|
|
|
return;
|
|
|
|
|
|
if(++linelen > breaklen && r == 0x20) {
|
|
|
|
|
|
Bputc(bout, '\n');
|
|
|
|
|
|
linelen = 0;
|
|
|
|
|
|
} else
|
|
|
|
|
|
Bputrune(bout, r);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
outrunes(Rune *rp)
|
|
|
|
|
|
{
|
|
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
|
|
|
|
while((r = *rp++) != 0)
|
|
|
|
|
|
outrune(r);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* like outrune, but when arg is know to be a char */
|
|
|
|
|
|
void
|
|
|
|
|
|
outchar(int c)
|
|
|
|
|
|
{
|
|
|
|
|
|
if(outinhibit)
|
|
|
|
|
|
return;
|
|
|
|
|
|
if(++linelen > breaklen && c == ' ') {
|
|
|
|
|
|
c ='\n';
|
|
|
|
|
|
linelen = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
Bputc(bout, c);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
outchars(char *s)
|
|
|
|
|
|
{
|
|
|
|
|
|
char c;
|
|
|
|
|
|
|
|
|
|
|
|
while((c = *s++) != 0)
|
|
|
|
|
|
outchar(c);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
outprint(char *fmt, ...)
|
|
|
|
|
|
{
|
|
|
|
|
|
char buf[1000];
|
|
|
|
|
|
va_list v;
|
|
|
|
|
|
|
|
|
|
|
|
va_start(v, fmt);
|
|
|
|
|
|
vsnprint(buf, sizeof(buf), fmt, v);
|
|
|
|
|
|
va_end(v);
|
|
|
|
|
|
outchars(buf);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
|
outpiece(char *b, char *e)
|
|
|
|
|
|
{
|
|
|
|
|
|
int c, lastc;
|
|
|
|
|
|
|
|
|
|
|
|
lastc = 0;
|
|
|
|
|
|
while(b < e) {
|
|
|
|
|
|
c = *b++;
|
|
|
|
|
|
if(c == '\n')
|
|
|
|
|
|
c = ' ';
|
|
|
|
|
|
if(!(c == ' ' && lastc == ' '))
|
|
|
|
|
|
outchar(c);
|
|
|
|
|
|
lastc = c;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Go to new line if not already there; indent if ind != 0.
|
|
|
|
|
|
* If ind > 1, leave a blank line too.
|
|
|
|
|
|
* Slight hack: assume if current line is only one or two
|
|
|
|
|
|
* characters long, then they were spaces.
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
outnl(int ind)
|
|
|
|
|
|
{
|
|
|
|
|
|
if(outinhibit)
|
|
|
|
|
|
return;
|
|
|
|
|
|
if(ind) {
|
|
|
|
|
|
if(ind > 1) {
|
|
|
|
|
|
if(linelen > 2)
|
|
|
|
|
|
Bputc(bout, '\n');
|
|
|
|
|
|
Bprint(bout, "\n ");
|
|
|
|
|
|
} else if(linelen == 0)
|
|
|
|
|
|
Bprint(bout, " ");
|
|
|
|
|
|
else if(linelen == 1)
|
|
|
|
|
|
Bputc(bout, ' ');
|
|
|
|
|
|
else if(linelen != 2)
|
|
|
|
|
|
Bprint(bout, "\n ");
|
|
|
|
|
|
linelen = 2;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if(linelen) {
|
|
|
|
|
|
Bputc(bout, '\n');
|
|
|
|
|
|
linelen = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Fold the runes in null-terminated rp.
|
|
|
|
|
|
* Use the sort(1) definition of folding (uppercase to lowercase,
|
|
|
|
|
|
* latin1-accented characters to corresponding unaccented chars)
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
fold(Rune *rp)
|
|
|
|
|
|
{
|
|
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
|
|
|
|
while((r = *rp) != 0) {
|
|
|
|
|
|
if (rislatin1(r) && latin_fold_tab[r-0xc0])
|
|
|
|
|
|
r = latin_fold_tab[r-0xc0];
|
|
|
|
|
|
if(risupper(r))
|
|
|
|
|
|
r = rtolower(r);
|
|
|
|
|
|
*rp++ = r;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Like fold, but put folded result into new
|
|
|
|
|
|
* (assumed to have enough space).
|
|
|
|
|
|
* old is a regular expression, but we know that
|
|
|
|
|
|
* metacharacters aren't affected
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
foldre(char *new, char *old)
|
|
|
|
|
|
{
|
|
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
|
|
|
|
while(*old) {
|
|
|
|
|
|
old += chartorune(&r, old);
|
|
|
|
|
|
if (rislatin1(r) && latin_fold_tab[r-0xc0])
|
|
|
|
|
|
r = latin_fold_tab[r-0xc0];
|
|
|
|
|
|
if(risupper(r))
|
|
|
|
|
|
r = rtolower(r);
|
|
|
|
|
|
new += runetochar(new, &r);
|
|
|
|
|
|
}
|
|
|
|
|
|
*new = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* acomp(s, t) returns:
|
|
|
|
|
|
* -2 if s strictly precedes t
|
|
|
|
|
|
* -1 if s is a prefix of t
|
|
|
|
|
|
* 0 if s is the same as t
|
|
|
|
|
|
* 1 if t is a prefix of s
|
|
|
|
|
|
* 2 if t strictly precedes s
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
|
acomp(Rune *s, Rune *t)
|
|
|
|
|
|
{
|
|
|
|
|
|
int cs, ct;
|
|
|
|
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
|
|
cs = *s;
|
|
|
|
|
|
ct = *t;
|
|
|
|
|
|
if(cs != ct)
|
|
|
|
|
|
break;
|
|
|
|
|
|
if(cs == 0)
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
s++;
|
|
|
|
|
|
t++;
|
|
|
|
|
|
}
|
|
|
|
|
|
if(cs == 0)
|
|
|
|
|
|
return -1;
|
|
|
|
|
|
if(ct == 0)
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
if(cs < ct)
|
|
|
|
|
|
return -2;
|
|
|
|
|
|
return 2;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Copy null terminated Runes from 'from' to 'to'.
|
|
|
|
|
|
*/
|
|
|
|
|
|
void
|
|
|
|
|
|
runescpy(Rune *to, Rune *from)
|
|
|
|
|
|
{
|
|
|
|
|
|
while((*to++ = *from++) != 0)
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Conversion of unsigned number to long, no overflow detection
|
|
|
|
|
|
*/
|
|
|
|
|
|
long
|
|
|
|
|
|
runetol(Rune *r)
|
|
|
|
|
|
{
|
|
|
|
|
|
int c;
|
|
|
|
|
|
long n;
|
|
|
|
|
|
|
|
|
|
|
|
n = 0;
|
|
|
|
|
|
for(;; r++){
|
|
|
|
|
|
c = *r;
|
|
|
|
|
|
if(0x30<=c && c<=0x39)
|
|
|
|
|
|
c -= '0';
|
|
|
|
|
|
else
|
|
|
|
|
|
break;
|
|
|
|
|
|
n = n*10 + c;
|
|
|
|
|
|
}
|
|
|
|
|
|
return n;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* See if there is a rune corresponding to the accented
|
|
|
|
|
|
* version of r with accent acc (acc in [LIGS..LIGE-1]),
|
|
|
|
|
|
* and return it if so, else return NONE.
|
|
|
|
|
|
*/
|
|
|
|
|
|
Rune
|
|
|
|
|
|
liglookup(Rune acc, Rune r)
|
|
|
|
|
|
{
|
|
|
|
|
|
Rune *p;
|
|
|
|
|
|
|
|
|
|
|
|
if(acc < LIGS || acc >= LIGE)
|
|
|
|
|
|
return NONE;
|
|
|
|
|
|
for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
|
|
|
|
|
|
if(*p == r)
|
|
|
|
|
|
return *(p+1);
|
|
|
|
|
|
return NONE;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
* Maintain a translation table stack (a translation table
|
|
|
|
|
|
* is an array of Runes indexed by bytes or 7-bit bytes).
|
|
|
|
|
|
* If starting is true, push the curtab onto the stack
|
|
|
|
|
|
* and return newtab; else pop the top of the stack and
|
|
|
|
|
|
* return it.
|
|
|
|
|
|
* If curtab is 0, initialize the stack and return.
|
|
|
|
|
|
*/
|
|
|
|
|
|
Rune *
|
|
|
|
|
|
changett(Rune *curtab, Rune *newtab, int starting)
|
|
|
|
|
|
{
|
|
|
|
|
|
if(curtab == 0) {
|
|
|
|
|
|
ntt = 0;
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if(starting) {
|
|
|
|
|
|
if(ntt >= asize(ttabstack)) {
|
|
|
|
|
|
if(debug)
|
|
|
|
|
|
err("translation stack overflow");
|
|
|
|
|
|
return curtab;
|
|
|
|
|
|
}
|
|
|
|
|
|
ttabstack[ntt++] = curtab;
|
|
|
|
|
|
return newtab;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if(ntt == 0) {
|
|
|
|
|
|
if(debug)
|
|
|
|
|
|
err("translation stack underflow");
|
|
|
|
|
|
return curtab;
|
|
|
|
|
|
}
|
|
|
|
|
|
return ttabstack[--ntt];
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|