Initial revision

This commit is contained in:
rsc 2003-09-30 17:47:41 +00:00
parent 5f7d5e8d18
commit b2cfc4e2e7
242 changed files with 18177 additions and 0 deletions

13
src/libutf/LICENSE Normal file
View file

@ -0,0 +1,13 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 1998-2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/

View file

@ -0,0 +1,6 @@
CC=gcc
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I${PREFIX}/include
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O

View file

@ -0,0 +1,7 @@
CC=gcc
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
NAN=nan64.$O

View file

@ -0,0 +1,6 @@
CC=cc
CFLAGS=-O -c -Ae -I.
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O

View file

@ -0,0 +1,7 @@
CC=gcc
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I.
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
NAN=nan64.$O

View file

@ -0,0 +1,7 @@
CC=gcc
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
NAN=nan64.$O

View file

@ -0,0 +1,6 @@
CC=cc
CFLAGS+=-g -c -I.
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O

View file

@ -0,0 +1,2 @@
include Make.SunOS-sun4u-$(CC)
NAN=nan64.$O

View file

@ -0,0 +1,6 @@
CC=cc
CFLAGS+=-g -c -I. -O
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O

View file

@ -0,0 +1,6 @@
CC=gcc
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c
O=o
AR=ar
ARFLAGS=rvc
NAN=nan64.$O

112
src/libutf/Makefile Normal file
View file

@ -0,0 +1,112 @@
# this works in gnu make
SYSNAME:=${shell uname}
OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'}
# this works in bsd make
SYSNAME!=uname
OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'
# the gnu rules will mess up bsd but not vice versa,
# hence the gnu rules come first.
include Make.$(SYSNAME)-$(OBJTYPE)
PREFIX=/usr/local
NUKEFILES=
TGZFILES=
LIB=libutf.a
VERSION=2.0
PORTPLACE=devel/libutf
NAME=libutf
OFILES=\
rune.$O\
runestrcat.$O\
runestrchr.$O\
runestrcmp.$O\
runestrcpy.$O\
runestrdup.$O\
runestrlen.$O\
runestrecpy.$O\
runestrncat.$O\
runestrncmp.$O\
runestrncpy.$O\
runestrrchr.$O\
runestrstr.$O\
runetype.$O\
utfecpy.$O\
utflen.$O\
utfnlen.$O\
utfrrune.$O\
utfrune.$O\
utfutf.$O\
HFILES=\
utf.h\
all: $(LIB)
install: $(LIB)
test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3
install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3
install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7
install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3
install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3
install -c -m 0644 utf.h $(PREFIX)/include/utf.h
install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
$(LIB): $(OFILES)
$(AR) $(ARFLAGS) $(LIB) $(OFILES)
NUKEFILES+=$(LIB)
.c.$O:
$(CC) $(CFLAGS) -I$(PREFIX)/include $*.c
%.$O: %.c
$(CC) $(CFLAGS) -I$(PREFIX)/include $*.c
$(OFILES): $(HFILES)
tgz:
rm -rf $(NAME)-$(VERSION)
mkdir $(NAME)-$(VERSION)
cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION)
tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz
rm -rf $(NAME)-$(VERSION)
clean:
rm -f $(OFILES) $(LIB)
nuke:
rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES)
rpm:
make tgz
cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES
rpm -ba rpm.spec
cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm .
cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm .
scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software
PORTDIR=/usr/ports/$(PORTPLACE)
ports:
make tgz
rm -rf $(PORTDIR)
mkdir $(PORTDIR)
cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles
cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}')
(cd $(PORTDIR); make makesum)
(cd $(PORTDIR); make)
(cd $(PORTDIR); /usr/local/bin/portlint)
rm -rf $(PORTDIR)/work
shar `find $(PORTDIR)` > ports.shar
(cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz
scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software
.phony: all clean nuke install tgz rpm ports

47
src/libutf/Makefile.BOT Normal file
View file

@ -0,0 +1,47 @@
.c.$O:
$(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c
%.$O: %.c
$(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c
$(OFILES): $(HFILES)
tgz:
rm -rf $(NAME)-$(VERSION)
mkdir $(NAME)-$(VERSION)
cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION)
tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz
rm -rf $(NAME)-$(VERSION)
clean:
rm -f $(OFILES) $(LIB)
nuke:
rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES)
rpm:
make tgz
cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES
rpm -ba rpm.spec
cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm .
cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm .
scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software
PORTDIR=/usr/ports/$(PORTPLACE)
ports:
make tgz
rm -rf $(PORTDIR)
mkdir $(PORTDIR)
cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles
cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}')
(cd $(PORTDIR); make makesum)
(cd $(PORTDIR); make)
(cd $(PORTDIR); /usr/local/bin/portlint)
rm -rf $(PORTDIR)/work
shar `find $(PORTDIR)` > ports.shar
(cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz
scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software
.phony: all clean nuke install tgz rpm ports

5
src/libutf/Makefile.CMD Normal file
View file

@ -0,0 +1,5 @@
$(TARG): $(OFILES)
$(CC) -o $(TARG) $(OFILES) -L$(PREFIX)/lib -lframe -ldraw -lthread -l9 -lregexp9 -lbio -lfmt -lutf -L/usr/X11R6/lib -lX11 -lm

4
src/libutf/Makefile.LIB Normal file
View file

@ -0,0 +1,4 @@
$(LIB): $(OFILES)
$(AR) $(ARFLAGS) $(LIB) $(OFILES)
NUKEFILES+=$(LIB)

41
src/libutf/Makefile.MID Normal file
View file

@ -0,0 +1,41 @@
LIB=libutf.a
VERSION=2.0
PORTPLACE=devel/libutf
NAME=libutf
OFILES=\
rune.$O\
runestrcat.$O\
runestrchr.$O\
runestrcmp.$O\
runestrcpy.$O\
runestrdup.$O\
runestrlen.$O\
runestrecpy.$O\
runestrncat.$O\
runestrncmp.$O\
runestrncpy.$O\
runestrrchr.$O\
runestrstr.$O\
runetype.$O\
utfecpy.$O\
utflen.$O\
utfnlen.$O\
utfrrune.$O\
utfrune.$O\
utfutf.$O\
HFILES=\
utf.h\
all: $(LIB)
install: $(LIB)
test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3
install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3
install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7
install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3
install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3
install -c -m 0644 utf.h $(PREFIX)/include/utf.h
install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)

20
src/libutf/Makefile.TOP Normal file
View file

@ -0,0 +1,20 @@
# this works in gnu make
SYSNAME:=${shell uname}
OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'}
# this works in bsd make
SYSNAME!=uname
OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'
# the gnu rules will mess up bsd but not vice versa,
# hence the gnu rules come first.
include Make.$(SYSNAME)-$(OBJTYPE)
PREFIX=/usr/local
NUKEFILES=
TGZFILES=

13
src/libutf/NOTICE Normal file
View file

@ -0,0 +1,13 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 1998-2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/

13
src/libutf/README Normal file
View file

@ -0,0 +1,13 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 1998-2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/

43
src/libutf/bundle.ports Normal file
View file

@ -0,0 +1,43 @@
--- Makefile ---
# New ports collection makefile for: libutf
# Date Created: 11 Feb 2003
# Whom: rsc
#
# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT
# $FreeBSD: ports/devel/libfmt/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $
PORTNAME= libutf
PORTVERSION= 2.0
CATEGORIES= devel
MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/
EXTRACT_SUFX= .tgz
MAINTAINER= rsc@post.harvard.edu
MAN3= rune.3 runestrcat.3 isalpharune.3
MAN7= utf.7
USE_REINPLACE=yes
.include <bsd.port.pre.mk>
post-patch:
${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile
.include <bsd.port.post.mk>
--- pkg-comment ---
UTF8 support library from Plan 9
--- pkg-descr ---
UTF8 support library from Plan 9.
WWW: http://pdos.lcs.mit.edu/~rsc/software/#libutf
http://plan9.bell-labs.com/magic/man2html/3/rune
Russ Cox
rsc@post.harvard.edu
--- pkg-plist ---
lib/libutf.a
include/utf.h
--- /dev/null ---
This is just a way to make sure blank lines don't
creep into pkg-plist.

47
src/libutf/isalpharune.3 Normal file
View file

@ -0,0 +1,47 @@
.TH ISALPHARUNE 3
.SH NAME
isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases
.SH SYNOPSIS
.B #include <utf.h>
.PP
.B
int isalpharune(Rune c)
.PP
.B
int islowerrune(Rune c)
.PP
.B
int isspacerune(Rune c)
.PP
.B
int istitlerune(Rune c)
.PP
.B
int isupperrune(Rune c)
.PP
.B
Rune tolowerrune(Rune c)
.PP
.B
Rune totitlerune(Rune c)
.PP
.B
Rune toupperrune(Rune c)
.SH DESCRIPTION
These routines examine and operate on Unicode characters,
in particular a subset of their properties as defined in the Unicode standard.
Unicode defines some characters as alphabetic and specifies three cases:
upper, lower, and title.
Analogously to
.IR ctype (3)
for
.SM ASCII\c
,
these routines
test types and modify cases for Unicode characters.
The names are self-explanatory.
.PP
The case-conversion routines return the character unchanged if it has no case.
.SH "SEE ALSO
.IR ctype (3) ,
.IR "The Unicode Standard" .

17
src/libutf/lib9.h Normal file
View file

@ -0,0 +1,17 @@
#include <string.h>
#include "utf.h"
#define nil ((void*)0)
#define uchar _fmtuchar
#define ushort _fmtushort
#define uint _fmtuint
#define ulong _fmtulong
#define vlong _fmtvlong
#define uvlong _fmtuvlong
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;

9
src/libutf/mkfile Normal file
View file

@ -0,0 +1,9 @@
all:V: Makefile Make.FreeBSD-386 Make.Linux-386 Make.NetBSD-386 Make.HP-UX-9000 Make.OSF1-alpha \
Make.SunOS-sun4u Make.SunOS-sun4u-cc Make.SunOS-sun4u-gcc \
Make.Darwin-PowerMacintosh
Makefile:D: ../libutf/Makefile.TOP Makefile.MID ../libutf/Makefile.LIB ../libutf/Makefile.BOT
cat $prereq >$target
Make.%: ../libutf/Make.%
cp $prereq $target

28
src/libutf/rpm.spec Normal file
View file

@ -0,0 +1,28 @@
Summary: Port of Plan 9's UTF8 support functions
Name: libutf
Version: 2.0
Release: 1
Group: Development/C
Copyright: Public Domain
Packager: Russ Cox <rsc@post.harvard.edu>
Source: http://pdos.lcs.mit.edu/~rsc/software/libutf-2.0.tgz
URL: http://pdos.lcs.mit.edu/~rsc/software/#libutf
%description
Libutf is a port of Plan 9's UTF8 support functions.
%prep
%setup
%build
make
%install
make install
%files
/usr/local/include/utf.h
/usr/local/lib/libutf.a
/usr/local/man/man3/runestrcat.3
/usr/local/man/man3/isalpharune.3
/usr/local/man/man3/rune.3
/usr/local/man/man7/utf.7

187
src/libutf/rune.3 Normal file
View file

@ -0,0 +1,187 @@
.TH RUNE 3
.SH NAME
runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
.SH SYNOPSIS
.ta \w'\fLchar*xx'u
.B #include <utf.h>
.PP
.B
int runetochar(char *s, Rune *r)
.PP
.B
int chartorune(Rune *r, char *s)
.PP
.B
int runelen(long r)
.PP
.B
int runenlen(Rune *r, int n)
.PP
.B
int fullrune(char *s, int n)
.PP
.B
char* utfecpy(char *s1, char *es1, char *s2)
.PP
.B
int utflen(char *s)
.PP
.B
int utfnlen(char *s, long n)
.PP
.B
char* utfrune(char *s, long c)
.PP
.B
char* utfrrune(char *s, long c)
.PP
.B
char* utfutf(char *s1, char *s2)
.SH DESCRIPTION
These routines convert to and from a
.SM UTF
byte stream and runes.
.PP
.I Runetochar
copies one rune at
.I r
to at most
.B UTFmax
bytes starting at
.I s
and returns the number of bytes copied.
.BR UTFmax ,
defined as
.B 3
in
.BR <libc.h> ,
is the maximum number of bytes required to represent a rune.
.PP
.I Chartorune
copies at most
.B UTFmax
bytes starting at
.I s
to one rune at
.I r
and returns the number of bytes copied.
If the input is not exactly in
.SM UTF
format,
.I chartorune
will convert to 0x80 and return 1.
.PP
.I Runelen
returns the number of bytes
required to convert
.I r
into
.SM UTF.
.PP
.I Runenlen
returns the number of bytes
required to convert the
.I n
runes pointed to by
.I r
into
.SM UTF.
.PP
.I Fullrune
returns 1 if the string
.I s
of length
.I n
is long enough to be decoded by
.I chartorune
and 0 otherwise.
This does not guarantee that the string
contains a legal
.SM UTF
encoding.
This routine is used by programs that
obtain input a byte at
a time and need to know when a full rune
has arrived.
.PP
The following routines are analogous to the
corresponding string routines with
.B utf
substituted for
.B str
and
.B rune
substituted for
.BR chr .
.PP
.I Utfecpy
copies UTF sequences until a null sequence has been copied, but writes no
sequences beyond
.IR es1 .
If any sequences are copied,
.I s1
is terminated by a null sequence, and a pointer to that sequence is returned.
Otherwise, the original
.I s1
is returned.
.PP
.I Utflen
returns the number of runes that
are represented by the
.SM UTF
string
.IR s .
.PP
.I Utfnlen
returns the number of complete runes that
are represented by the first
.I n
bytes of
.SM UTF
string
.IR s .
If the last few bytes of the string contain an incompletely coded rune,
.I utfnlen
will not count them; in this way, it differs from
.IR utflen ,
which includes every byte of the string.
.PP
.I Utfrune
.RI ( utfrrune )
returns a pointer to the first (last)
occurrence of rune
.I c
in the
.SM UTF
string
.IR s ,
or 0 if
.I c
does not occur in the string.
The NUL byte terminating a string is considered to
be part of the string
.IR s .
.PP
.I Utfutf
returns a pointer to the first occurrence of
the
.SM UTF
string
.I s2
as a
.SM UTF
substring of
.IR s1 ,
or 0 if there is none.
If
.I s2
is the null string,
.I utfutf
returns
.IR s1 .
.SH HISTORY
These routines were written by Rob Pike and Ken Thompson
and first appeared in Plan 9.
.SH SEE ALSO
.IR utf (7),
.IR tcs (1)

177
src/libutf/rune.c Normal file
View file

@ -0,0 +1,177 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
enum
{
Bit1 = 7,
Bitx = 6,
Bit2 = 5,
Bit3 = 4,
Bit4 = 3,
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
Bad = Runeerror,
};
int
chartorune(Rune *rune, char *str)
{
int c, c1, c2;
long l;
/*
* one character sequence
* 00000-0007F => T1
*/
c = *(uchar*)str;
if(c < Tx) {
*rune = c;
return 1;
}
/*
* two character sequence
* 0080-07FF => T2 Tx
*/
c1 = *(uchar*)(str+1) ^ Tx;
if(c1 & Testx)
goto bad;
if(c < T3) {
if(c < T2)
goto bad;
l = ((c << Bitx) | c1) & Rune2;
if(l <= Rune1)
goto bad;
*rune = l;
return 2;
}
/*
* three character sequence
* 0800-FFFF => T3 Tx Tx
*/
c2 = *(uchar*)(str+2) ^ Tx;
if(c2 & Testx)
goto bad;
if(c < T4) {
l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
if(l <= Rune2)
goto bad;
*rune = l;
return 3;
}
/*
* bad decoding
*/
bad:
*rune = Bad;
return 1;
}
int
runetochar(char *str, Rune *rune)
{
long c;
/*
* one character sequence
* 00000-0007F => 00-7F
*/
c = *rune;
if(c <= Rune1) {
str[0] = c;
return 1;
}
/*
* two character sequence
* 0080-07FF => T2 Tx
*/
if(c <= Rune2) {
str[0] = T2 | (c >> 1*Bitx);
str[1] = Tx | (c & Maskx);
return 2;
}
/*
* three character sequence
* 0800-FFFF => T3 Tx Tx
*/
str[0] = T3 | (c >> 2*Bitx);
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
str[2] = Tx | (c & Maskx);
return 3;
}
int
runelen(long c)
{
Rune rune;
char str[10];
rune = c;
return runetochar(str, &rune);
}
int
runenlen(Rune *r, int nrune)
{
int nb, c;
nb = 0;
while(nrune--) {
c = *r++;
if(c <= Rune1)
nb++;
else
if(c <= Rune2)
nb += 2;
else
nb += 3;
}
return nb;
}
int
fullrune(char *str, int n)
{
int c;
if(n > 0) {
c = *(uchar*)str;
if(c < Tx)
return 1;
if(n > 1)
if(c < T3 || n > 2)
return 1;
}
return 0;
}

65
src/libutf/runestrcat.3 Normal file
View file

@ -0,0 +1,65 @@
.TH RUNESTRCAT 3
.SH NAME
runestrcat,
runestrncat,
runestrcmp,
runestrncmp,
runestrcpy,
runestrncpy,
runestrecpy,
runestrlen,
runestrchr,
runestrrchr,
runestrdup,
runestrstr \- rune string operations
.SH SYNOPSIS
.B #include <utf.h>
.PP
.ta \w'\fLRune* \fP'u
.B
Rune* runestrcat(Rune *s1, Rune *s2)
.PP
.B
Rune* runestrncat(Rune *s1, Rune *s2, long n)
.PP
.B
int runestrcmp(Rune *s1, Rune *s2)
.PP
.B
int runestrncmp(Rune *s1, Rune *s2, long n)
.PP
.B
Rune* runestrcpy(Rune *s1, Rune *s2)
.PP
.B
Rune* runestrncpy(Rune *s1, Rune *s2, long n)
.PP
.B
Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2)
.PP
.B
long runestrlen(Rune *s)
.PP
.B
Rune* runestrchr(Rune *s, Rune c)
.PP
.B
Rune* runestrrchr(Rune *s, Rune c)
.PP
.B
Rune* runestrdup(Rune *s)
.PP
.B
Rune* runestrstr(Rune *s1, Rune *s2)
.SH DESCRIPTION
These functions are rune string analogues of
the corresponding functions in
.IR strcat (3).
.SH HISTORY
These routines first appeared in Plan 9.
.SH SEE ALSO
.IR memmove (3),
.IR rune (3),
.IR strcat (2)
.SH BUGS
The outcome of overlapping moves varies among implementations.

25
src/libutf/runestrcat.c Normal file
View file

@ -0,0 +1,25 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrcat(Rune *s1, Rune *s2)
{
runestrcpy(runestrchr(s1, 0), s2);
return s1;
}

35
src/libutf/runestrchr.c Normal file
View file

@ -0,0 +1,35 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrchr(Rune *s, Rune c)
{
Rune c0 = c;
Rune c1;
if(c == 0) {
while(*s++)
;
return s-1;
}
while(c1 = *s++)
if(c1 == c0)
return s-1;
return 0;
}

35
src/libutf/runestrcmp.c Normal file
View file

@ -0,0 +1,35 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
int
runestrcmp(Rune *s1, Rune *s2)
{
Rune c1, c2;
for(;;) {
c1 = *s1++;
c2 = *s2++;
if(c1 != c2) {
if(c1 > c2)
return 1;
return -1;
}
if(c1 == 0)
return 0;
}
}

28
src/libutf/runestrcpy.c Normal file
View file

@ -0,0 +1,28 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrcpy(Rune *s1, Rune *s2)
{
Rune *os1;
os1 = s1;
while(*s1++ = *s2++)
;
return os1;
}

30
src/libutf/runestrdup.c Normal file
View file

@ -0,0 +1,30 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include <stdlib.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrdup(Rune *s)
{
Rune *ns;
ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
if(ns == 0)
return 0;
return runestrcpy(ns, s);
}

32
src/libutf/runestrecpy.c Normal file
View file

@ -0,0 +1,32 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrecpy(Rune *s1, Rune *es1, Rune *s2)
{
if(s1 >= es1)
return s1;
while(*s1++ = *s2++){
if(s1 == es1){
*--s1 = '\0';
break;
}
}
return s1;
}

24
src/libutf/runestrlen.c Normal file
View file

@ -0,0 +1,24 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
long
runestrlen(Rune *s)
{
return runestrchr(s, 0) - s;
}

32
src/libutf/runestrncat.c Normal file
View file

@ -0,0 +1,32 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrncat(Rune *s1, Rune *s2, long n)
{
Rune *os1;
os1 = s1;
s1 = runestrchr(s1, 0);
while(*s1++ = *s2++)
if(--n < 0) {
s1[-1] = 0;
break;
}
return os1;
}

37
src/libutf/runestrncmp.c Normal file
View file

@ -0,0 +1,37 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
int
runestrncmp(Rune *s1, Rune *s2, long n)
{
Rune c1, c2;
while(n > 0) {
c1 = *s1++;
c2 = *s2++;
n--;
if(c1 != c2) {
if(c1 > c2)
return 1;
return -1;
}
if(c1 == 0)
break;
}
return 0;
}

33
src/libutf/runestrncpy.c Normal file
View file

@ -0,0 +1,33 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrncpy(Rune *s1, Rune *s2, long n)
{
int i;
Rune *os1;
os1 = s1;
for(i = 0; i < n; i++)
if((*s1++ = *s2++) == 0) {
while(++i < n)
*s1++ = 0;
return os1;
}
return os1;
}

30
src/libutf/runestrrchr.c Normal file
View file

@ -0,0 +1,30 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
Rune*
runestrrchr(Rune *s, Rune c)
{
Rune *r;
if(c == 0)
return runestrchr(s, 0);
r = 0;
while(s = runestrchr(s, c))
r = s++;
return r;
}

44
src/libutf/runestrstr.c Normal file
View file

@ -0,0 +1,44 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
*/
Rune*
runestrstr(Rune *s1, Rune *s2)
{
Rune *p, *pa, *pb;
int c0, c;
c0 = *s2;
if(c0 == 0)
return s1;
s2++;
for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) {
pa = p;
for(pb=s2;; pb++) {
c = *pb;
if(c == 0)
return p;
if(c != *++pa)
break;
}
}
return 0;
}

1152
src/libutf/runetype.c Normal file

File diff suppressed because it is too large Load diff

91
src/libutf/utf.7 Normal file
View file

@ -0,0 +1,91 @@
.TH UTF 7
.SH NAME
UTF, Unicode, ASCII, rune \- character set and format
.SH DESCRIPTION
The Plan 9 character set and representation are
based on the Unicode Standard and on the ISO multibyte
.SM UTF-8
encoding (Universal Character
Set Transformation Format, 8 bits wide).
The Unicode Standard represents its characters in 16
bits;
.SM UTF-8
represents such
values in an 8-bit byte stream.
Throughout this manual,
.SM UTF-8
is shortened to
.SM UTF.
.PP
In Plan 9, a
.I rune
is a 16-bit quantity representing a Unicode character.
Internally, programs may store characters as runes.
However, any external manifestation of textual information,
in files or at the interface between programs, uses a
machine-independent, byte-stream encoding called
.SM UTF.
.PP
.SM UTF
is designed so the 7-bit
.SM ASCII
set (values hexadecimal 00 to 7F),
appear only as themselves
in the encoding.
Runes with values above 7F appear as sequences of two or more
bytes with values only from 80 to FF.
.PP
The
.SM UTF
encoding of the Unicode Standard is backward compatible with
.SM ASCII\c
:
programs presented only with
.SM ASCII
work on Plan 9
even if not written to deal with
.SM UTF,
as do
programs that deal with uninterpreted byte streams.
However, programs that perform semantic processing on
.SM ASCII
graphic
characters must convert from
.SM UTF
to runes
in order to work properly with non-\c
.SM ASCII
input.
See
.IR rune (2).
.PP
Letting numbers be binary,
a rune x is converted to a multibyte
.SM UTF
sequence
as follows:
.PP
01. x in [00000000.0bbbbbbb] → 0bbbbbbb
.br
10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
.br
11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb
.br
.PP
Conversion 01 provides a one-byte sequence that spans the
.SM ASCII
character set in a compatible way.
Conversions 10 and 11 represent higher-valued characters
as sequences of two or three bytes with the high bit set.
Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open.
When there are multiple ways to encode a value, for example rune 0,
the shortest encoding is used.
.PP
In the inverse mapping,
any sequence except those described above
is incorrect and is converted to rune hexadecimal 0080.
.SH "SEE ALSO"
.IR ascii (1),
.IR tcs (1),
.IR rune (3),
.IR "The Unicode Standard" .

51
src/libutf/utf.h Normal file
View file

@ -0,0 +1,51 @@
#ifndef _UTFH_
#define _UTFH_ 1
typedef unsigned short Rune; /* 16 bits */
enum
{
UTFmax = 3, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
Runeerror = 0x80, /* decoding error in UTF */
};
/*
* rune routines
*/
extern int runetochar(char*, Rune*);
extern int chartorune(Rune*, char*);
extern int runelen(long);
extern int runenlen(Rune*, int);
extern int fullrune(char*, int);
extern int utflen(char*);
extern int utfnlen(char*, long);
extern char* utfrune(char*, long);
extern char* utfrrune(char*, long);
extern char* utfutf(char*, char*);
extern char* utfecpy(char*, char*, char*);
extern Rune* runestrcat(Rune*, Rune*);
extern Rune* runestrchr(Rune*, Rune);
extern int runestrcmp(Rune*, Rune*);
extern Rune* runestrcpy(Rune*, Rune*);
extern Rune* runestrncpy(Rune*, Rune*, long);
extern Rune* runestrecpy(Rune*, Rune*, Rune*);
extern Rune* runestrdup(Rune*);
extern Rune* runestrncat(Rune*, Rune*, long);
extern int runestrncmp(Rune*, Rune*, long);
extern Rune* runestrrchr(Rune*, Rune);
extern long runestrlen(Rune*);
extern Rune* runestrstr(Rune*, Rune*);
extern Rune tolowerrune(Rune);
extern Rune totitlerune(Rune);
extern Rune toupperrune(Rune);
extern int isalpharune(Rune);
extern int islowerrune(Rune);
extern int isspacerune(Rune);
extern int istitlerune(Rune);
extern int isupperrune(Rune);
#endif

14
src/libutf/utfdef.h Normal file
View file

@ -0,0 +1,14 @@
#define uchar _utfuchar
#define ushort _utfushort
#define uint _utfuint
#define ulong _utfulong
#define vlong _utfvlong
#define uvlong _utfuvlong
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define nil ((void*)0)

36
src/libutf/utfecpy.c Normal file
View file

@ -0,0 +1,36 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
char*
utfecpy(char *to, char *e, char *from)
{
char *end;
if(to >= e)
return to;
end = memccpy(to, from, '\0', e - to);
if(end == nil){
end = e-1;
while(end>to && (*--end&0xC0)==0x80)
;
*end = '\0';
}else{
end--;
}
return end;
}

38
src/libutf/utflen.c Normal file
View file

@ -0,0 +1,38 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
int
utflen(char *s)
{
int c;
long n;
Rune rune;
n = 0;
for(;;) {
c = *(uchar*)s;
if(c < Runeself) {
if(c == 0)
return n;
s++;
} else
s += chartorune(&rune, s);
n++;
}
return 0;
}

41
src/libutf/utfnlen.c Normal file
View file

@ -0,0 +1,41 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
int
utfnlen(char *s, long m)
{
int c;
long n;
Rune rune;
char *es;
es = s + m;
for(n = 0; s < es; n++) {
c = *(uchar*)s;
if(c < Runeself){
if(c == '\0')
break;
s++;
continue;
}
if(!fullrune(s, es-s))
break;
s += chartorune(&rune, s);
}
return n;
}

46
src/libutf/utfrrune.c Normal file
View file

@ -0,0 +1,46 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
char*
utfrrune(char *s, long c)
{
long c1;
Rune r;
char *s1;
if(c < Runesync) /* not part of utf sequence */
return strrchr(s, c);
s1 = 0;
for(;;) {
c1 = *(uchar*)s;
if(c1 < Runeself) { /* one byte rune */
if(c1 == 0)
return s1;
if(c1 == c)
s1 = s;
s++;
continue;
}
c1 = chartorune(&r, s);
if(r == c)
s1 = s;
s += c1;
}
return 0;
}

45
src/libutf/utfrune.c Normal file
View file

@ -0,0 +1,45 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
char*
utfrune(char *s, long c)
{
long c1;
Rune r;
int n;
if(c < Runesync) /* not part of utf sequence */
return strchr(s, c);
for(;;) {
c1 = *(uchar*)s;
if(c1 < Runeself) { /* one byte rune */
if(c1 == 0)
return 0;
if(c1 == c)
return s;
s++;
continue;
}
n = chartorune(&r, s);
if(r == c)
return s;
s += n;
}
return 0;
}

41
src/libutf/utfutf.c Normal file
View file

@ -0,0 +1,41 @@
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "utf.h"
#include "utfdef.h"
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
*/
char*
utfutf(char *s1, char *s2)
{
char *p;
long f, n1, n2;
Rune r;
n1 = chartorune(&r, s2);
f = r;
if(f <= Runesync) /* represents self */
return strstr(s1, s2);
n2 = strlen(s2);
for(p=s1; p=utfrune(p, f); p+=n1)
if(strncmp(p, s2, n2) == 0)
return p;
return 0;
}