checkpoint
This commit is contained in:
parent
2634795b5f
commit
78e51a8c66
314 changed files with 48199 additions and 300 deletions
1
unix/man/.cvsignore
Normal file
1
unix/man/.cvsignore
Normal file
|
|
@ -0,0 +1 @@
|
|||
bio3.html fmtinstall3.html fmtstrtod3.html index.html isalpharune3.html mk1.html print3.html quote3.html regexp93.html regexp97.html rune3.html runestrcat3.html utf7.html
|
||||
363
unix/man/bio.3
Normal file
363
unix/man/bio.3
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
.TH BIO 3
|
||||
.SH NAME
|
||||
Bopen, Bfdopen, Binit, Binits, Brdline, Brdstr, Bgetc, Bgetrune, Bgetd, Bungetc, Bungetrune, Bread, Bseek, Boffset, Bfildes, Blinelen, Bputc, Bputrune, Bprint, Bvprint, Bwrite, Bflush, Bterm, Bbuffered \- buffered input/output
|
||||
.SH SYNOPSIS
|
||||
.ta \w'\fLBiobuf* 'u
|
||||
.B #include <utf.h>
|
||||
.br
|
||||
.B #include <fmt.h>
|
||||
.br
|
||||
.B #include <bio.h>
|
||||
.PP
|
||||
.B
|
||||
Biobuf* Bopen(char *file, int mode)
|
||||
.PP
|
||||
.B
|
||||
Biobuf* Bfdopen(int fd, int mode)
|
||||
.PP
|
||||
.B
|
||||
int Binit(Biobuf *bp, int fd, int mode)
|
||||
.PP
|
||||
.B
|
||||
int Binits(Biobufhdr *bp, int fd, int mode, uchar *buf, int size)
|
||||
.PP
|
||||
.B
|
||||
int Bterm(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bprint(Biobufhdr *bp, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int Bvprint(Biobufhdr *bp, char *format, va_list arglist);
|
||||
.PP
|
||||
.B
|
||||
void* Brdline(Biobufhdr *bp, int delim)
|
||||
.PP
|
||||
.B
|
||||
char* Brdstr(Biobufhdr *bp, int delim, int nulldelim)
|
||||
.PP
|
||||
.B
|
||||
int Blinelen(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
vlong Boffset(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bfildes(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bgetc(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
long Bgetrune(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bgetd(Biobufhdr *bp, double *d)
|
||||
.PP
|
||||
.B
|
||||
int Bungetc(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bungetrune(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
vlong Bseek(Biobufhdr *bp, vlong n, int type)
|
||||
.PP
|
||||
.B
|
||||
int Bputc(Biobufhdr *bp, int c)
|
||||
.PP
|
||||
.B
|
||||
int Bputrune(Biobufhdr *bp, long c)
|
||||
.PP
|
||||
.B
|
||||
long Bread(Biobufhdr *bp, void *addr, long nbytes)
|
||||
.PP
|
||||
.B
|
||||
long Bwrite(Biobufhdr *bp, void *addr, long nbytes)
|
||||
.PP
|
||||
.B
|
||||
int Bflush(Biobufhdr *bp)
|
||||
.PP
|
||||
.B
|
||||
int Bbuffered(Biobufhdr *bp)
|
||||
.PP
|
||||
.SH DESCRIPTION
|
||||
These routines implement fast buffered I/O.
|
||||
I/O on different file descriptors is independent.
|
||||
.PP
|
||||
.I Bopen
|
||||
opens
|
||||
.I file
|
||||
for mode
|
||||
.B O_RDONLY
|
||||
or creates for mode
|
||||
.BR O_WRONLY .
|
||||
It calls
|
||||
.IR malloc (3)
|
||||
to allocate a buffer.
|
||||
.PP
|
||||
.I Bfdopen
|
||||
allocates a buffer for the already-open file descriptor
|
||||
.I fd
|
||||
for mode
|
||||
.B O_RDONLY
|
||||
or
|
||||
.BR O_WRONLY .
|
||||
It calls
|
||||
.IR malloc (3)
|
||||
to allocate a buffer.
|
||||
.PP
|
||||
.I Binit
|
||||
initializes a standard size buffer, type
|
||||
.IR Biobuf ,
|
||||
with the open file descriptor passed in
|
||||
by the user.
|
||||
.I Binits
|
||||
initializes a non-standard size buffer, type
|
||||
.IR Biobufhdr ,
|
||||
with the open file descriptor,
|
||||
buffer area, and buffer size passed in
|
||||
by the user.
|
||||
.I Biobuf
|
||||
and
|
||||
.I Biobufhdr
|
||||
are related by the declaration:
|
||||
.IP
|
||||
.EX
|
||||
typedef struct Biobuf Biobuf;
|
||||
struct Biobuf
|
||||
{
|
||||
Biobufhdr;
|
||||
uchar b[Bungetsize+Bsize];
|
||||
};
|
||||
.EE
|
||||
.PP
|
||||
Arguments
|
||||
of types pointer to Biobuf and pointer to Biobufhdr
|
||||
can be used interchangeably in the following routines.
|
||||
.PP
|
||||
.IR Bopen ,
|
||||
.IR Binit ,
|
||||
or
|
||||
.I Binits
|
||||
should be called before any of the
|
||||
other routines on that buffer.
|
||||
.I Bfildes
|
||||
returns the integer file descriptor of the associated open file.
|
||||
.PP
|
||||
.I Bterm
|
||||
flushes the buffer for
|
||||
.IR bp .
|
||||
If the buffer was allocated by
|
||||
.IR Bopen ,
|
||||
the buffer is
|
||||
.I freed
|
||||
and the file is closed.
|
||||
.PP
|
||||
.I Brdline
|
||||
reads a string from the file associated with
|
||||
.I bp
|
||||
up to and including the first
|
||||
.I delim
|
||||
character.
|
||||
The delimiter character at the end of the line is
|
||||
not altered.
|
||||
.I Brdline
|
||||
returns a pointer to the start of the line or
|
||||
.L 0
|
||||
on end-of-file or read error.
|
||||
.I Blinelen
|
||||
returns the length (including the delimiter)
|
||||
of the most recent string returned by
|
||||
.IR Brdline .
|
||||
.PP
|
||||
.I Brdstr
|
||||
returns a
|
||||
.IR malloc (3)-allocated
|
||||
buffer containing the next line of input delimited by
|
||||
.IR delim ,
|
||||
terminated by a NUL (0) byte.
|
||||
Unlike
|
||||
.IR Brdline ,
|
||||
which returns when its buffer is full even if no delimiter has been found,
|
||||
.I Brdstr
|
||||
will return an arbitrarily long line in a single call.
|
||||
If
|
||||
.I nulldelim
|
||||
is set, the terminal delimiter will be overwritten with a NUL.
|
||||
After a successful call to
|
||||
.IR Brdstr ,
|
||||
the return value of
|
||||
.I Blinelen
|
||||
will be the length of the returned buffer, excluding the NUL.
|
||||
.PP
|
||||
.I Bgetc
|
||||
returns the next character from
|
||||
.IR bp ,
|
||||
or a negative value
|
||||
at end of file.
|
||||
.I Bungetc
|
||||
may be called immediately after
|
||||
.I Bgetc
|
||||
to allow the same character to be reread.
|
||||
.PP
|
||||
.I Bgetrune
|
||||
calls
|
||||
.I Bgetc
|
||||
to read the bytes of the next
|
||||
.SM UTF
|
||||
sequence in the input stream and returns the value of the rune
|
||||
represented by the sequence.
|
||||
It returns a negative value
|
||||
at end of file.
|
||||
.I Bungetrune
|
||||
may be called immediately after
|
||||
.I Bgetrune
|
||||
to allow the same
|
||||
.SM UTF
|
||||
sequence to be reread as either bytes or a rune.
|
||||
.I Bungetc
|
||||
and
|
||||
.I Bungetrune
|
||||
may back up a maximum of five bytes.
|
||||
.PP
|
||||
.I Bgetd
|
||||
uses
|
||||
.I fmtcharstod
|
||||
(see
|
||||
.IR fmtstrtod (3))
|
||||
and
|
||||
.I Bgetc
|
||||
to read the formatted
|
||||
floating-point number in the input stream,
|
||||
skipping initial blanks and tabs.
|
||||
The value is stored in
|
||||
.BR *d.
|
||||
.PP
|
||||
.I Bread
|
||||
reads
|
||||
.I nbytes
|
||||
of data from
|
||||
.I bp
|
||||
into memory starting at
|
||||
.IR addr .
|
||||
The number of bytes read is returned on success
|
||||
and a negative value is returned if a read error occurred.
|
||||
.PP
|
||||
.I Bseek
|
||||
applies
|
||||
.IR lseek (2)
|
||||
to
|
||||
.IR bp .
|
||||
It returns the new file offset.
|
||||
.I Boffset
|
||||
returns the file offset of the next character to be processed.
|
||||
.PP
|
||||
.I Bputc
|
||||
outputs the low order 8 bits of
|
||||
.I c
|
||||
on
|
||||
.IR bp .
|
||||
If this causes a
|
||||
.IR write
|
||||
to occur and there is an error,
|
||||
a negative value is returned.
|
||||
Otherwise, a zero is returned.
|
||||
.PP
|
||||
.I Bputrune
|
||||
calls
|
||||
.I Bputc
|
||||
to output the low order
|
||||
16 bits of
|
||||
.I c
|
||||
as a rune
|
||||
in
|
||||
.SM UTF
|
||||
format
|
||||
on the output stream.
|
||||
.PP
|
||||
.I Bprint
|
||||
is a buffered interface to
|
||||
.IR print (3).
|
||||
If this causes a
|
||||
.IR write
|
||||
to occur and there is an error,
|
||||
a negative value
|
||||
.RB ( Beof )
|
||||
is returned.
|
||||
Otherwise, the number of bytes output is returned.
|
||||
.I Bvprint
|
||||
does the same except it takes as argument a
|
||||
.B va_list
|
||||
parameter, so it can be called within a variadic function.
|
||||
.PP
|
||||
.I Bwrite
|
||||
outputs
|
||||
.I nbytes
|
||||
of data starting at
|
||||
.I addr
|
||||
to
|
||||
.IR bp .
|
||||
If this causes a
|
||||
.IR write
|
||||
to occur and there is an error,
|
||||
a negative value is returned.
|
||||
Otherwise, the number of bytes written is returned.
|
||||
.PP
|
||||
.I Bflush
|
||||
causes any buffered output associated with
|
||||
.I bp
|
||||
to be written.
|
||||
The return is as for
|
||||
.IR Bputc .
|
||||
.I Bflush
|
||||
is called on
|
||||
exit for every buffer still open
|
||||
for writing.
|
||||
.PP
|
||||
.I Bbuffered
|
||||
returns the number of bytes in the buffer.
|
||||
When reading, this is the number of bytes still available from the last
|
||||
read on the file; when writing, it is the number of bytes ready to be
|
||||
written.
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR open (2),
|
||||
.IR print (3),
|
||||
.IR atexit (3),
|
||||
.IR utf (7),
|
||||
.SH DIAGNOSTICS
|
||||
.I Bio
|
||||
routines that return integers yield
|
||||
.B Beof
|
||||
if
|
||||
.I bp
|
||||
is not the descriptor of an open file.
|
||||
.I Bopen
|
||||
returns zero if the file cannot be opened in the given mode.
|
||||
All routines set
|
||||
.I errstr
|
||||
on error.
|
||||
.SH BUGS
|
||||
.I Brdline
|
||||
returns an error on strings longer than the buffer associated
|
||||
with the file
|
||||
and also if the end-of-file is encountered
|
||||
before a delimiter.
|
||||
.I Blinelen
|
||||
will tell how many characters are available
|
||||
in these cases.
|
||||
In the case of a true end-of-file,
|
||||
.I Blinelen
|
||||
will return zero.
|
||||
At the cost of allocating a buffer,
|
||||
.I Brdstr
|
||||
sidesteps these issues.
|
||||
.PP
|
||||
The data returned by
|
||||
.I Brdline
|
||||
may be overwritten by calls to any other
|
||||
.I bio
|
||||
routine on the same
|
||||
.IR bp.
|
||||
8
unix/man/ex.man
Normal file
8
unix/man/ex.man
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
.deEX
|
||||
.ift .ft5
|
||||
.nf
|
||||
..
|
||||
.deEE
|
||||
.ft1
|
||||
.fi
|
||||
..
|
||||
34
unix/man/fixurls
Executable file
34
unix/man/fixurls
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
open(OMIT, "9 sed -n 's/.*Omitman\\[\"(.*)\\((.)\\)\".*/\\1 \\2/p' /usr/local/plan9/dist/checkman.awk |") || die "omit: $!";
|
||||
@omit = <OMIT>;
|
||||
close OMIT;
|
||||
chomp @omit;
|
||||
push @omit, "grep 1", "lseek 2", "tcs 1", "sed 1", "rc 1", "strcat 3", "yacc 1";
|
||||
|
||||
sub noref {
|
||||
my ($p, $s) = @_;
|
||||
|
||||
$text =~ s!<a href="../man$s/$p.html">(([^<]|<[^/]|</[^a])*)</a>!\1!g;
|
||||
}
|
||||
|
||||
for($i=0; $i<@ARGV; $i++){
|
||||
open(IN, $ARGV[$i]) || die "open $ARGV[$i]: $!";
|
||||
@text = <IN>;
|
||||
close IN;
|
||||
$text = join("", @text);
|
||||
|
||||
foreach $o (@omit) {
|
||||
$o =~ /(.*) (.*)/;
|
||||
noref($1, $2);
|
||||
}
|
||||
|
||||
$text =~ s!../man(.)/([^.]*)\.html!$2$1.html!g;
|
||||
$text =~ s!(http://swtch.com/plan9port/unix)!<a href="\1">\1</a>!g;
|
||||
|
||||
open(OUT, ">$ARGV[$i]") || die "open $ARGV[$i]: $!";
|
||||
print OUT $text;
|
||||
close OUT;
|
||||
}
|
||||
|
||||
exit 0;
|
||||
371
unix/man/fmtinstall.3
Normal file
371
unix/man/fmtinstall.3
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
.TH FMTINSTALL 3
|
||||
.SH NAME
|
||||
fmtinstall, dofmt, dorfmt, fmtprint, fmtvprint, fmtrune, fmtstrcpy, fmtrunestrcpy, fmtfdinit, fmtfdflush, fmtstrinit, fmtstrflush, runefmtstrinit, runefmtstrflush, errfmt \- support for user-defined print formats and output routines
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.br
|
||||
.B #include <fmt.h>
|
||||
.PP
|
||||
.ft L
|
||||
.nf
|
||||
.ta \w' 'u +\w' 'u +\w' 'u +\w' 'u +\w' 'u
|
||||
typedef struct Fmt Fmt;
|
||||
struct Fmt{
|
||||
uchar runes; /* output buffer is runes or chars? */
|
||||
void *start; /* of buffer */
|
||||
void *to; /* current place in the buffer */
|
||||
void *stop; /* end of the buffer; overwritten if flush fails */
|
||||
int (*flush)(Fmt*); /* called when to == stop */
|
||||
void *farg; /* to make flush a closure */
|
||||
int nfmt; /* num chars formatted so far */
|
||||
va_list args; /* args passed to dofmt */
|
||||
int r; /* % format Rune */
|
||||
int width;
|
||||
int prec;
|
||||
ulong flags;
|
||||
};
|
||||
|
||||
enum{
|
||||
FmtWidth = 1,
|
||||
FmtLeft = FmtWidth << 1,
|
||||
FmtPrec = FmtLeft << 1,
|
||||
FmtSharp = FmtPrec << 1,
|
||||
FmtSpace = FmtSharp << 1,
|
||||
FmtSign = FmtSpace << 1,
|
||||
FmtZero = FmtSign << 1,
|
||||
FmtUnsigned = FmtZero << 1,
|
||||
FmtShort = FmtUnsigned << 1,
|
||||
FmtLong = FmtShort << 1,
|
||||
FmtVLong = FmtLong << 1,
|
||||
FmtComma = FmtVLong << 1,
|
||||
|
||||
FmtFlag = FmtComma << 1
|
||||
};
|
||||
.fi
|
||||
.PP
|
||||
.B
|
||||
.ta \w'\fLchar* 'u
|
||||
|
||||
.PP
|
||||
.B
|
||||
int fmtfdinit(Fmt *f, int fd, char *buf, int nbuf);
|
||||
.PP
|
||||
.B
|
||||
int fmtfdflush(Fmt *f);
|
||||
.PP
|
||||
.B
|
||||
int fmtstrinit(Fmt *f);
|
||||
.PP
|
||||
.B
|
||||
char* fmtstrflush(Fmt *f);
|
||||
.PP
|
||||
.B
|
||||
int runefmtstrinit(Fmt *f);
|
||||
.PP
|
||||
.B
|
||||
Rune* runefmtstrflush(Fmt *f);
|
||||
|
||||
.PP
|
||||
.B
|
||||
int fmtinstall(int c, int (*fn)(Fmt*));
|
||||
.PP
|
||||
.B
|
||||
int dofmt(Fmt *f, char *fmt);
|
||||
.PP
|
||||
.B
|
||||
int dorfmt(Fmt*, Rune *fmt);
|
||||
.PP
|
||||
.B
|
||||
int fmtprint(Fmt *f, char *fmt, ...);
|
||||
.PP
|
||||
.B
|
||||
int fmtvprint(Fmt *f, char *fmt, va_list v);
|
||||
.PP
|
||||
.B
|
||||
int fmtrune(Fmt *f, int r);
|
||||
.PP
|
||||
.B
|
||||
int fmtstrcpy(Fmt *f, char *s);
|
||||
.PP
|
||||
.B
|
||||
int fmtrunestrcpy(Fmt *f, Rune *s);
|
||||
.PP
|
||||
.B
|
||||
int errfmt(Fmt *f);
|
||||
.SH DESCRIPTION
|
||||
The interface described here allows the construction of custom
|
||||
.IR print (3)
|
||||
verbs and output routines.
|
||||
In essence, they provide access to the workings of the formatted print code.
|
||||
.PP
|
||||
The
|
||||
.IR print (3)
|
||||
suite maintains its state with a data structure called
|
||||
.BR Fmt .
|
||||
A typical call to
|
||||
.IR print (3)
|
||||
or its relatives initializes a
|
||||
.B Fmt
|
||||
structure, passes it to subsidiary routines to process the output,
|
||||
and finishes by emitting any saved state recorded in the
|
||||
.BR Fmt .
|
||||
The details of the
|
||||
.B Fmt
|
||||
are unimportant to outside users, except insofar as the general
|
||||
design influences the interface.
|
||||
The
|
||||
.B Fmt
|
||||
records whether the output is in runes or bytes,
|
||||
the verb being processed, its precision and width,
|
||||
and buffering parameters.
|
||||
Most important, it also records a
|
||||
.I flush
|
||||
routine that the library will call if a buffer overflows.
|
||||
When printing to a file descriptor, the flush routine will
|
||||
emit saved characters and reset the buffer; when printing
|
||||
to an allocated string, it will resize the string to receive more output.
|
||||
The flush routine is nil when printing to fixed-size buffers.
|
||||
User code need never provide a flush routine; this is done internally
|
||||
by the library.
|
||||
.SS Custom output routines
|
||||
To write a custom output routine, such as an error handler that
|
||||
formats and prints custom error messages, the output sequence can be run
|
||||
from outside the library using the routines described here.
|
||||
There are two main cases: output to an open file descriptor
|
||||
and output to a string.
|
||||
.PP
|
||||
To write to a file descriptor, call
|
||||
.I fmtfdinit
|
||||
to initialize the local
|
||||
.B Fmt
|
||||
structure
|
||||
.IR f ,
|
||||
giving the file descriptor
|
||||
.IR fd ,
|
||||
the buffer
|
||||
.IR buf ,
|
||||
and its size
|
||||
.IR nbuf .
|
||||
Then call
|
||||
.IR fmtprint
|
||||
or
|
||||
.IR fmtvprint
|
||||
to generate the output.
|
||||
These behave like
|
||||
.B fprint
|
||||
(see
|
||||
.IR print (3))
|
||||
or
|
||||
.B vfprint
|
||||
except that the characters are buffered until
|
||||
.I fmtfdflush
|
||||
is called and the return value is either 0 or \-1.
|
||||
A typical example of this sequence appears in the Examples section.
|
||||
.PP
|
||||
The same basic sequence applies when outputting to an allocated string:
|
||||
call
|
||||
.I fmtstrinit
|
||||
to initialize the
|
||||
.BR Fmt ,
|
||||
then call
|
||||
.I fmtprint
|
||||
and
|
||||
.I fmtvprint
|
||||
to generate the output.
|
||||
Finally,
|
||||
.I fmtstrflush
|
||||
will return the allocated string, which should be freed after use.
|
||||
To output to a rune string, use
|
||||
.I runefmtstrinit
|
||||
and
|
||||
.IR runefmtstrflush .
|
||||
Regardless of the output style or type,
|
||||
.I fmtprint
|
||||
or
|
||||
.I fmtvprint
|
||||
generates the characters.
|
||||
.SS Custom format verbs
|
||||
.I Fmtinstall
|
||||
is used to install custom verbs and flags labeled by character
|
||||
.IR c ,
|
||||
which may be any non-zero Unicode character.
|
||||
.I Fn
|
||||
should be declared as
|
||||
.IP
|
||||
.EX
|
||||
int fn(Fmt*)
|
||||
.EE
|
||||
.PP
|
||||
.IB Fp ->r
|
||||
is the flag or verb character to cause
|
||||
.I fn
|
||||
to be called.
|
||||
In
|
||||
.IR fn ,
|
||||
.IB fp ->width ,
|
||||
.IB fp ->prec
|
||||
are the width and precision, and
|
||||
.IB fp ->flags
|
||||
the decoded flags for the verb (see
|
||||
.IR print (3)
|
||||
for a description of these items).
|
||||
The standard flag values are:
|
||||
.B FmtSign
|
||||
.RB ( + ),
|
||||
.B FmtLeft
|
||||
.RB ( - ),
|
||||
.B FmtSpace
|
||||
.RB ( '\ ' ),
|
||||
.B FmtSharp
|
||||
.RB ( # ),
|
||||
.B FmtComma
|
||||
.RB ( , ),
|
||||
.B FmtLong
|
||||
.RB ( l ),
|
||||
.B FmtShort
|
||||
.RB ( h ),
|
||||
.B FmtUnsigned
|
||||
.RB ( u ),
|
||||
and
|
||||
.B FmtVLong
|
||||
.RB ( ll ).
|
||||
The flag bits
|
||||
.B FmtWidth
|
||||
and
|
||||
.B FmtPrec
|
||||
identify whether a width and precision were specified.
|
||||
.PP
|
||||
.I Fn
|
||||
is passed a pointer to the
|
||||
.B Fmt
|
||||
structure recording the state of the output.
|
||||
If
|
||||
.IB fp ->r
|
||||
is a verb (rather than a flag),
|
||||
.I fn
|
||||
should use
|
||||
.B Fmt->args
|
||||
to fetch its argument from the list,
|
||||
then format it, and return zero.
|
||||
If
|
||||
.IB fp ->r
|
||||
is a flag,
|
||||
.I fn
|
||||
should return one.
|
||||
All interpretation of
|
||||
.IB fp ->width\f1,
|
||||
.IB fp ->prec\f1,
|
||||
and
|
||||
.IB fp-> flags
|
||||
is left up to the conversion routine.
|
||||
.I Fmtinstall
|
||||
returns 0 if the installation succeeds, \-1 if it fails.
|
||||
.PP
|
||||
.IR Fmtprint
|
||||
and
|
||||
.IR fmtvprint
|
||||
may be called to
|
||||
help prepare output in custom conversion routines.
|
||||
However, these functions clear the width, precision, and flags.
|
||||
Both functions return 0 for success and \-1 for failure.
|
||||
.PP
|
||||
The functions
|
||||
.I dofmt
|
||||
and
|
||||
.I dorfmt
|
||||
are the underlying formatters; they
|
||||
use the existing contents of
|
||||
.B Fmt
|
||||
and should be called only by sophisticated conversion routines.
|
||||
These routines return the number of characters (bytes of UTF or runes)
|
||||
produced.
|
||||
.PP
|
||||
Some internal functions may be useful to format primitive types.
|
||||
They honor the width, precision and flags as described in
|
||||
.IR print (3).
|
||||
.I Fmtrune
|
||||
formats a single character
|
||||
.BR r .
|
||||
.I Fmtstrcpy
|
||||
formats a string
|
||||
.BR s ;
|
||||
.I fmtrunestrcpy
|
||||
formats a rune string
|
||||
.BR s .
|
||||
.I Errfmt
|
||||
formats the system error string.
|
||||
All these routines return zero for successful execution.
|
||||
Conversion routines that call these functions will work properly
|
||||
regardless of whether the output is bytes or runes.
|
||||
.\" .PP
|
||||
.\" .IR 2c (1)
|
||||
.\" describes the C directive
|
||||
.\" .B #pragma
|
||||
.\" .B varargck
|
||||
.\" that can be used to provide type-checking for custom print verbs and output routines.
|
||||
.SH EXAMPLES
|
||||
This function prints an error message with a variable
|
||||
number of arguments and then quits.
|
||||
Compared to the corresponding example in
|
||||
.IR print (3),
|
||||
this version uses a smaller buffer, will never truncate
|
||||
the output message, but might generate multiple
|
||||
.B write
|
||||
system calls to produce its output.
|
||||
.IP
|
||||
.EX
|
||||
.ta 6n +6n +6n +6n +6n +6n +6n +6n +6n
|
||||
#pragma varargck argpos error 1
|
||||
|
||||
void fatal(char *fmt, ...)
|
||||
{
|
||||
Fmt f;
|
||||
char buf[64];
|
||||
va_list arg;
|
||||
|
||||
fmtfdinit(&f, 1, buf, sizeof buf);
|
||||
fmtprint(&f, "fatal: ");
|
||||
va_start(arg, fmt);
|
||||
fmtvprint(&f, fmt, arg);
|
||||
va_end(arg);
|
||||
fmtprint(&f, "\en");
|
||||
fmtfdflush(&f);
|
||||
exits("fatal error");
|
||||
}
|
||||
.EE
|
||||
.PP
|
||||
This example adds a verb to print complex numbers.
|
||||
.IP
|
||||
.EX
|
||||
typedef
|
||||
struct {
|
||||
double r, i;
|
||||
} Complex;
|
||||
|
||||
#pragma varargck type "X" Complex
|
||||
|
||||
int
|
||||
Xfmt(Fmt *f)
|
||||
{
|
||||
Complex c;
|
||||
|
||||
c = va_arg(f->args, Complex);
|
||||
return fmtprint(f, "(%g,%g)", c.r, c.i);
|
||||
}
|
||||
|
||||
main(...)
|
||||
{
|
||||
Complex x = (Complex){ 1.5, -2.3 };
|
||||
|
||||
fmtinstall('X', Xfmt);
|
||||
print("x = %X\en", x);
|
||||
}
|
||||
.EE
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR print (3),
|
||||
.IR utf (7)
|
||||
.SH DIAGNOSTICS
|
||||
These routines return negative numbers or nil for errors and set
|
||||
.IR errstr .
|
||||
54
unix/man/fmtstrtod.3
Normal file
54
unix/man/fmtstrtod.3
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
.TH FMTSTRTOD 3
|
||||
.SH NAME
|
||||
fmtstrtod, fmtcharstod \ - convert text to numbers
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.br
|
||||
.B #include <fmt.h>
|
||||
.PP
|
||||
.PP
|
||||
.B
|
||||
double fmtstrtod(char *nptr, char **rptr)
|
||||
.PP
|
||||
.B
|
||||
double fmtcharstod(int (*f)(void *), void *a)
|
||||
.SH DESCRIPTION
|
||||
.I Fmtstrtod
|
||||
converts a string pointed to by
|
||||
.I nptr
|
||||
to floating point representation and, if
|
||||
.I rptr
|
||||
is not zero, sets
|
||||
.I *rptr
|
||||
to point to the input character immediately after the string converted.
|
||||
.I Fmtstrtod
|
||||
recognizes an optional string of tabs and spaces,
|
||||
then an optional sign, then a string of digits optionally
|
||||
containing a decimal point, then an optional
|
||||
.L e
|
||||
or
|
||||
.L E
|
||||
followed by an optionally signed integer.
|
||||
.PP
|
||||
.PP
|
||||
.I Fmtcharstod
|
||||
interprets floating point numbers in the manner of
|
||||
.IR atof ,
|
||||
but gets successive characters by calling
|
||||
.BR (*\fIf\fP)(a) .
|
||||
The last call to
|
||||
.I f
|
||||
terminates the scan, so it must have returned a character that
|
||||
is not a legal continuation of a number.
|
||||
Therefore, it may be necessary to back up the input stream one character
|
||||
after calling
|
||||
.IR fmtcharstod .
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR fscanf (3)
|
||||
.SH DIAGNOSTICS
|
||||
Zero is returned if the beginning of the input string is not interpretable
|
||||
as a number; even in this case,
|
||||
.I rptr
|
||||
will be updated.
|
||||
9
unix/man/index.html
Normal file
9
unix/man/index.html
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta http-equiv="refresh" content="0; URL=..">
|
||||
<title>you're lost!</title>
|
||||
</head>
|
||||
<body>
|
||||
Please go <a href="..">here</a>.
|
||||
</body>
|
||||
</html>
|
||||
49
unix/man/isalpharune.3
Normal file
49
unix/man/isalpharune.3
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
.TH ISALPHARUNE 3
|
||||
.SH NAME
|
||||
isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.PP
|
||||
.B
|
||||
int isalpharune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
int islowerrune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
int isspacerune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
int istitlerune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
int isupperrune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
Rune tolowerrune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
Rune totitlerune(Rune c)
|
||||
.PP
|
||||
.B
|
||||
Rune toupperrune(Rune c)
|
||||
.SH DESCRIPTION
|
||||
These routines examine and operate on Unicode characters,
|
||||
in particular a subset of their properties as defined in the Unicode standard.
|
||||
Unicode defines some characters as alphabetic and specifies three cases:
|
||||
upper, lower, and title.
|
||||
Analogously to
|
||||
.IR isalpha (3)
|
||||
for
|
||||
.SM ASCII\c
|
||||
,
|
||||
these routines
|
||||
test types and modify cases for Unicode characters.
|
||||
The names are self-explanatory.
|
||||
.PP
|
||||
The case-conversion routines return the character unchanged if it has no case.
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH "SEE ALSO
|
||||
.IR isalpha (3) ,
|
||||
.IR "The Unicode Standard" .
|
||||
684
unix/man/mk.1
Normal file
684
unix/man/mk.1
Normal file
|
|
@ -0,0 +1,684 @@
|
|||
.TH MK 1
|
||||
.SH NAME
|
||||
mk \- maintain (make) related files
|
||||
.SH SYNOPSIS
|
||||
.B mk
|
||||
[
|
||||
.B -f
|
||||
.I mkfile
|
||||
] ...
|
||||
[
|
||||
.I option ...
|
||||
]
|
||||
[
|
||||
.I target ...
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.I Mk
|
||||
uses the dependency rules specified in
|
||||
.I mkfile
|
||||
to control the update (usually by compilation) of
|
||||
.I targets
|
||||
(usually files)
|
||||
from the source files upon which they depend.
|
||||
The
|
||||
.I mkfile
|
||||
(default
|
||||
.LR mkfile )
|
||||
contains a
|
||||
.I rule
|
||||
for each target that identifies the files and other
|
||||
targets upon which it depends and an
|
||||
.IR sh (1)
|
||||
script, a
|
||||
.IR recipe ,
|
||||
to update the target.
|
||||
The script is run if the target does not exist
|
||||
or if it is older than any of the files it depends on.
|
||||
.I Mkfile
|
||||
may also contain
|
||||
.I meta-rules
|
||||
that define actions for updating implicit targets.
|
||||
If no
|
||||
.I target
|
||||
is specified, the target of the first rule (not meta-rule) in
|
||||
.I mkfile
|
||||
is updated.
|
||||
.PP
|
||||
The environment variable
|
||||
.B $NPROC
|
||||
determines how many targets may be updated simultaneously;
|
||||
Some operating systems, e.g., Plan 9, set
|
||||
.B $NPROC
|
||||
automatically to the number of CPUs on the current machine.
|
||||
.PP
|
||||
Options are:
|
||||
.TP \w'\fL-d[egp]\ 'u
|
||||
.B -a
|
||||
Assume all targets to be out of date.
|
||||
Thus, everything is updated.
|
||||
.PD 0
|
||||
.TP
|
||||
.BR -d [ egp ]
|
||||
Produce debugging output
|
||||
.RB ( p
|
||||
is for parsing,
|
||||
.B g
|
||||
for graph building,
|
||||
.B e
|
||||
for execution).
|
||||
.TP
|
||||
.B -e
|
||||
Explain why each target is made.
|
||||
.TP
|
||||
.B -i
|
||||
Force any missing intermediate targets to be made.
|
||||
.TP
|
||||
.B -k
|
||||
Do as much work as possible in the face of errors.
|
||||
.TP
|
||||
.B -n
|
||||
Print, but do not execute, the commands
|
||||
needed to update the targets.
|
||||
.TP
|
||||
.B -s
|
||||
Make the command line arguments sequentially rather than in parallel.
|
||||
.TP
|
||||
.B -t
|
||||
Touch (update the modified date of) file targets, without
|
||||
executing any recipes.
|
||||
.TP
|
||||
.BI -w target1 , target2,...
|
||||
Pretend the modify time for each
|
||||
.I target
|
||||
is the current time; useful in conjunction with
|
||||
.B -n
|
||||
to learn what updates would be triggered by
|
||||
modifying the
|
||||
.IR targets .
|
||||
.PD
|
||||
.SS The \fLmkfile\fP
|
||||
A
|
||||
.I mkfile
|
||||
consists of
|
||||
.I assignments
|
||||
(described under `Environment') and
|
||||
.IR rules .
|
||||
A rule contains
|
||||
.I targets
|
||||
and a
|
||||
.IR tail .
|
||||
A target is a literal string
|
||||
and is normally a file name.
|
||||
The tail contains zero or more
|
||||
.I prerequisites
|
||||
and an optional
|
||||
.IR recipe ,
|
||||
which is an
|
||||
.B shell
|
||||
script.
|
||||
Each line of the recipe must begin with white space.
|
||||
A rule takes the form
|
||||
.IP
|
||||
.EX
|
||||
target: prereq1 prereq2
|
||||
\f2recipe using\fP prereq1, prereq2 \f2to build\fP target
|
||||
.EE
|
||||
.PP
|
||||
When the recipe is executed,
|
||||
the first character on every line is elided.
|
||||
.PP
|
||||
After the colon on the target line, a rule may specify
|
||||
.IR attributes ,
|
||||
described below.
|
||||
.PP
|
||||
A
|
||||
.I meta-rule
|
||||
has a target of the form
|
||||
.IB A % B
|
||||
where
|
||||
.I A
|
||||
and
|
||||
.I B
|
||||
are (possibly empty) strings.
|
||||
A meta-rule acts as a rule for any potential target whose
|
||||
name matches
|
||||
.IB A % B
|
||||
with
|
||||
.B %
|
||||
replaced by an arbitrary string, called the
|
||||
.IR stem .
|
||||
In interpreting a meta-rule,
|
||||
the stem is substituted for all occurrences of
|
||||
.B %
|
||||
in the prerequisite names.
|
||||
In the recipe of a meta-rule, the environment variable
|
||||
.B $stem
|
||||
contains the string matched by the
|
||||
.BR % .
|
||||
For example, a meta-rule to compile a C program
|
||||
might be:
|
||||
.IP
|
||||
.EX
|
||||
%: %.c
|
||||
cc -c $stem.c
|
||||
ld -o $stem $stem.o
|
||||
.EE
|
||||
.PP
|
||||
Meta-rules may contain an ampersand
|
||||
.B &
|
||||
rather than a percent sign
|
||||
.BR % .
|
||||
A
|
||||
.B %
|
||||
matches a maximal length string of any characters;
|
||||
an
|
||||
.B &
|
||||
matches a maximal length string of any characters except period
|
||||
or slash.
|
||||
.PP
|
||||
The text of the
|
||||
.I mkfile
|
||||
is processed as follows.
|
||||
Lines beginning with
|
||||
.B <
|
||||
followed by a file name are replaced by the contents of the named
|
||||
file.
|
||||
Lines beginning with
|
||||
.B "<|"
|
||||
followed by a file name are replaced by the output
|
||||
of the execution of the named
|
||||
file.
|
||||
Blank lines and comments, which run from unquoted
|
||||
.B #
|
||||
characters to the following newline, are deleted.
|
||||
The character sequence backslash-newline is deleted,
|
||||
so long lines in
|
||||
.I mkfile
|
||||
may be folded.
|
||||
Non-recipe lines are processed by substituting for
|
||||
.BI `{ command }
|
||||
the output of the
|
||||
.I command
|
||||
when run by
|
||||
.IR sh .
|
||||
References to variables are replaced by the variables' values.
|
||||
Special characters may be quoted using single quotes
|
||||
.BR \&''
|
||||
as in
|
||||
.IR sh (1).
|
||||
.PP
|
||||
Assignments and rules are distinguished by
|
||||
the first unquoted occurrence of
|
||||
.B :
|
||||
(rule)
|
||||
or
|
||||
.B =
|
||||
(assignment).
|
||||
.PP
|
||||
A later rule may modify or override an existing rule under the
|
||||
following conditions:
|
||||
.TP
|
||||
\-
|
||||
If the targets of the rules exactly match and one rule
|
||||
contains only a prerequisite clause and no recipe, the
|
||||
clause is added to the prerequisites of the other rule.
|
||||
If either or both targets are virtual, the recipe is
|
||||
always executed.
|
||||
.TP
|
||||
\-
|
||||
If the targets of the rules match exactly and the
|
||||
prerequisites do not match and both rules
|
||||
contain recipes,
|
||||
.I mk
|
||||
reports an ``ambiguous recipe'' error.
|
||||
.TP
|
||||
\-
|
||||
If the target and prerequisites of both rules match exactly,
|
||||
the second rule overrides the first.
|
||||
.SS Environment
|
||||
Rules may make use of
|
||||
shell
|
||||
environment variables.
|
||||
A legal reference of the form
|
||||
.B $OBJ
|
||||
or
|
||||
.B ${name}
|
||||
is expanded as in
|
||||
.IR sh (1).
|
||||
A reference of the form
|
||||
.BI ${name: A % B = C\fL%\fID\fL}\fR,
|
||||
where
|
||||
.I A, B, C, D
|
||||
are (possibly empty) strings,
|
||||
has the value formed by expanding
|
||||
.B $name
|
||||
and substituting
|
||||
.I C
|
||||
for
|
||||
.I A
|
||||
and
|
||||
.I D
|
||||
for
|
||||
.I B
|
||||
in each word in
|
||||
.B $name
|
||||
that matches pattern
|
||||
.IB A % B\f1.
|
||||
.PP
|
||||
Variables can be set by
|
||||
assignments of the form
|
||||
.I
|
||||
var\fL=\fR[\fIattr\fL=\fR]\fIvalue\fR
|
||||
.br
|
||||
Blanks in the
|
||||
.I value
|
||||
break it into words.
|
||||
Such variables are exported
|
||||
to the environment of
|
||||
recipes as they are executed, unless
|
||||
.BR U ,
|
||||
the only legal attribute
|
||||
.IR attr ,
|
||||
is present.
|
||||
The initial value of a variable is
|
||||
taken from (in increasing order of precedence)
|
||||
the default values below,
|
||||
.I mk's
|
||||
environment, the
|
||||
.IR mkfiles ,
|
||||
and any command line assignment as an argument to
|
||||
.IR mk .
|
||||
A variable assignment argument overrides the first (but not any subsequent)
|
||||
assignment to that variable.
|
||||
.PP
|
||||
The variable
|
||||
.B MKFLAGS
|
||||
contains all the option arguments (arguments starting with
|
||||
.L -
|
||||
or containing
|
||||
.LR = )
|
||||
and
|
||||
.B MKARGS
|
||||
contains all the targets in the call to
|
||||
.IR mk .
|
||||
.PP
|
||||
The variable
|
||||
.B MKSHELL
|
||||
contains the shell command line
|
||||
.I mk
|
||||
uses to run recipes.
|
||||
If the first word of the command ends in
|
||||
.B rc
|
||||
or
|
||||
.BR rcsh ,
|
||||
.I mk
|
||||
uses
|
||||
.IR rc (1)'s
|
||||
quoting rules; otherwise it uses
|
||||
.IR sh (1)'s.
|
||||
The
|
||||
.B MKSHELL
|
||||
variable is consulted when the mkfile is read, not when it is executed,
|
||||
so that different shells can be used within a single mkfile:
|
||||
.IP
|
||||
.EX
|
||||
MKSHELL=$PLAN9/bin/rc
|
||||
use-rc:V:
|
||||
for(i in a b c) echo $i
|
||||
|
||||
MKSHELL=sh
|
||||
use-sh:V:
|
||||
for i in a b c; do echo $i; done
|
||||
.EE
|
||||
.LP
|
||||
Mkfiles included via
|
||||
.B <
|
||||
or
|
||||
.B <|
|
||||
.RI ( q.v. )
|
||||
see their own private copy of
|
||||
.BR MKSHELL ,
|
||||
which always starts set to
|
||||
.B sh .
|
||||
.PP
|
||||
Dynamic information may be included in the mkfile by using a line of the form
|
||||
.IP
|
||||
\fR<|\fIcommand\fR \fIargs\fR
|
||||
.LP
|
||||
This runs the command
|
||||
.I command
|
||||
with the given arguments
|
||||
.I args
|
||||
and pipes its standard output to
|
||||
.I mk
|
||||
to be included as part of the mkfile. For instance, the Inferno kernels
|
||||
use this technique
|
||||
to run a shell command with an awk script and a configuration
|
||||
file as arguments in order for
|
||||
the
|
||||
.I awk
|
||||
script to process the file and output a set of variables and their values.
|
||||
.SS Execution
|
||||
.PP
|
||||
During execution,
|
||||
.I mk
|
||||
determines which targets must be updated, and in what order,
|
||||
to build the
|
||||
.I names
|
||||
specified on the command line.
|
||||
It then runs the associated recipes.
|
||||
.PP
|
||||
A target is considered up to date if it has no prerequisites or
|
||||
if all its prerequisites are up to date and it is newer
|
||||
than all its prerequisites.
|
||||
Once the recipe for a target has executed, the target is
|
||||
considered up to date.
|
||||
.PP
|
||||
The date stamp
|
||||
used to determine if a target is up to date is computed
|
||||
differently for different types of targets.
|
||||
If a target is
|
||||
.I virtual
|
||||
(the target of a rule with the
|
||||
.B V
|
||||
attribute),
|
||||
its date stamp is initially zero; when the target is
|
||||
updated the date stamp is set to
|
||||
the most recent date stamp of its prerequisites.
|
||||
Otherwise, if a target does not exist as a file,
|
||||
its date stamp is set to the most recent date stamp of its prerequisites,
|
||||
or zero if it has no prerequisites.
|
||||
Otherwise, the target is the name of a file and
|
||||
the target's date stamp is always that file's modification date.
|
||||
The date stamp is computed when the target is needed in
|
||||
the execution of a rule; it is not a static value.
|
||||
.PP
|
||||
Nonexistent targets that have prerequisites
|
||||
and are themselves prerequisites are treated specially.
|
||||
Such a target
|
||||
.I t
|
||||
is given the date stamp of its most recent prerequisite
|
||||
and if this causes all the targets which have
|
||||
.I t
|
||||
as a prerequisite to be up to date,
|
||||
.I t
|
||||
is considered up to date.
|
||||
Otherwise,
|
||||
.I t
|
||||
is made in the normal fashion.
|
||||
The
|
||||
.B -i
|
||||
flag overrides this special treatment.
|
||||
.PP
|
||||
Files may be made in any order that respects
|
||||
the preceding restrictions.
|
||||
.PP
|
||||
A recipe is executed by supplying the recipe as standard input to
|
||||
the command
|
||||
.BR /bin/sh .
|
||||
(Note that unlike
|
||||
.IR make ,
|
||||
.I mk
|
||||
feeds the entire recipe to the shell rather than running each line
|
||||
of the recipe separately.)
|
||||
The environment is augmented by the following variables:
|
||||
.TP 14
|
||||
.B $alltarget
|
||||
all the targets of this rule.
|
||||
.TP
|
||||
.B $newprereq
|
||||
the prerequisites that caused this rule to execute.
|
||||
.TP
|
||||
.B $newmember
|
||||
the prerequisites that are members of an aggregate
|
||||
that caused this rule to execute.
|
||||
When the prerequisites of a rule are members of an
|
||||
aggregate,
|
||||
.B $newprereq
|
||||
contains the name of the aggregate and out of date
|
||||
members, while
|
||||
.B $newmember
|
||||
contains only the name of the members.
|
||||
.TP
|
||||
.B $nproc
|
||||
the process slot for this recipe.
|
||||
It satisfies
|
||||
.RB 0≤ $nproc < $NPROC .
|
||||
.TP
|
||||
.B $pid
|
||||
the process id for the
|
||||
.I mk
|
||||
executing the recipe.
|
||||
.TP
|
||||
.B $prereq
|
||||
all the prerequisites for this rule.
|
||||
.TP
|
||||
.B $stem
|
||||
if this is a meta-rule,
|
||||
.B $stem
|
||||
is the string that matched
|
||||
.B %
|
||||
or
|
||||
.BR & .
|
||||
Otherwise, it is empty.
|
||||
For regular expression meta-rules (see below), the variables
|
||||
.LR stem0 ", ...,"
|
||||
.L stem9
|
||||
are set to the corresponding subexpressions.
|
||||
.TP
|
||||
.B $target
|
||||
the targets for this rule that need to be remade.
|
||||
.PP
|
||||
These variables are available only during the execution of a recipe,
|
||||
not while evaluating the
|
||||
.IR mkfile .
|
||||
.PP
|
||||
Unless the rule has the
|
||||
.B Q
|
||||
attribute,
|
||||
the recipe is printed prior to execution
|
||||
with recognizable environment variables expanded.
|
||||
Commands returning error status
|
||||
cause
|
||||
.I mk
|
||||
to terminate.
|
||||
.PP
|
||||
Recipes and backquoted
|
||||
.B rc
|
||||
commands in places such as assignments
|
||||
execute in a copy of
|
||||
.I mk's
|
||||
environment; changes they make to
|
||||
environment variables are not visible from
|
||||
.IR mk .
|
||||
.PP
|
||||
Variable substitution in a rule is done when
|
||||
the rule is read; variable substitution in the recipe is done
|
||||
when the recipe is executed. For example:
|
||||
.IP
|
||||
.EX
|
||||
bar=a.c
|
||||
foo: $bar
|
||||
$CC -o foo $bar
|
||||
bar=b.c
|
||||
.EE
|
||||
.PP
|
||||
will compile
|
||||
.B b.c
|
||||
into
|
||||
.BR foo ,
|
||||
if
|
||||
.B a.c
|
||||
is newer than
|
||||
.BR foo .
|
||||
.SS Aggregates
|
||||
Names of the form
|
||||
.IR a ( b )
|
||||
refer to member
|
||||
.I b
|
||||
of the aggregate
|
||||
.IR a .
|
||||
.SS Attributes
|
||||
The colon separating the target from the prerequisites
|
||||
may be
|
||||
immediately followed by
|
||||
.I attributes
|
||||
and another colon.
|
||||
The attributes are:
|
||||
.TP
|
||||
.B D
|
||||
If the recipe exits with a non-null status, the target is deleted.
|
||||
.TP
|
||||
.B E
|
||||
Continue execution if the recipe draws errors.
|
||||
.TP
|
||||
.B N
|
||||
If there is no recipe, the target has its time updated.
|
||||
.TP
|
||||
.B n
|
||||
The rule is a meta-rule that cannot be a target of a virtual rule.
|
||||
Only files match the pattern in the target.
|
||||
.TP
|
||||
.B P
|
||||
The characters after the
|
||||
.B P
|
||||
until the terminating
|
||||
.B :
|
||||
are taken as a program name.
|
||||
It will be invoked as
|
||||
.B "sh -c prog 'arg1' 'arg2'"
|
||||
and should return a zero exit status
|
||||
if and only if arg1 is up to date with respect to arg2.
|
||||
Date stamps are still propagated in the normal way.
|
||||
.TP
|
||||
.B Q
|
||||
The recipe is not printed prior to execution.
|
||||
.TP
|
||||
.B R
|
||||
The rule is a meta-rule using regular expressions.
|
||||
In the rule,
|
||||
.B %
|
||||
has no special meaning.
|
||||
The target is interpreted as a regular expression as defined in
|
||||
.IR regexp9 (7).
|
||||
The prerequisites may contain references
|
||||
to subexpressions in form
|
||||
.BI \e n\f1,
|
||||
as in the substitute command of
|
||||
.IR sed (1).
|
||||
.TP
|
||||
.B U
|
||||
The targets are considered to have been updated
|
||||
even if the recipe did not do so.
|
||||
.TP
|
||||
.B V
|
||||
The targets of this rule are marked as virtual.
|
||||
They are distinct from files of the same name.
|
||||
.PD
|
||||
.SH EXAMPLES
|
||||
A simple mkfile to compile a program:
|
||||
.IP
|
||||
.EX
|
||||
.ta 8n +8n +8n +8n +8n +8n +8n
|
||||
</$objtype/mkfile
|
||||
|
||||
prog: a.$O b.$O c.$O
|
||||
$LD $LDFLAGS -o $target $prereq
|
||||
|
||||
%.$O: %.c
|
||||
$CC $CFLAGS $stem.c
|
||||
.EE
|
||||
.PP
|
||||
Override flag settings in the mkfile:
|
||||
.IP
|
||||
.EX
|
||||
% mk target 'CFLAGS=-S -w'
|
||||
.EE
|
||||
.PP
|
||||
Maintain a library:
|
||||
.IP
|
||||
.EX
|
||||
libc.a(%.$O):N: %.$O
|
||||
libc.a: libc.a(abs.$O) libc.a(access.$O) libc.a(alarm.$O) ...
|
||||
ar r libc.a $newmember
|
||||
.EE
|
||||
.PP
|
||||
String expression variables to derive names from a master list:
|
||||
.IP
|
||||
.EX
|
||||
NAMES=alloc arc bquote builtins expand main match mk var word
|
||||
OBJ=${NAMES:%=%.$O}
|
||||
.EE
|
||||
.PP
|
||||
Regular expression meta-rules:
|
||||
.IP
|
||||
.EX
|
||||
([^/]*)/(.*)\e.$O:R: \e1/\e2.c
|
||||
cd $stem1; $CC $CFLAGS $stem2.c
|
||||
.EE
|
||||
.PP
|
||||
A correct way to deal with
|
||||
.IR yacc (1)
|
||||
grammars.
|
||||
The file
|
||||
.B lex.c
|
||||
includes the file
|
||||
.B x.tab.h
|
||||
rather than
|
||||
.B y.tab.h
|
||||
in order to reflect changes in content, not just modification time.
|
||||
.IP
|
||||
.EX
|
||||
lex.$O: x.tab.h
|
||||
x.tab.h: y.tab.h
|
||||
cmp -s x.tab.h y.tab.h || cp y.tab.h x.tab.h
|
||||
y.tab.c y.tab.h: gram.y
|
||||
$YACC -d gram.y
|
||||
.EE
|
||||
.PP
|
||||
The above example could also use the
|
||||
.B P
|
||||
attribute for the
|
||||
.B x.tab.h
|
||||
rule:
|
||||
.IP
|
||||
.EX
|
||||
x.tab.h:Pcmp -s: y.tab.h
|
||||
cp y.tab.h x.tab.h
|
||||
.EE
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR sh (1),
|
||||
.IR regexp9 (7)
|
||||
.PP
|
||||
A. Hume,
|
||||
``Mk: a Successor to Make''
|
||||
(Tenth Edition Research Unix Manuals).
|
||||
.PP
|
||||
Andrew G. Hume and Bob Flandrena,
|
||||
``Maintaining Files on Plan 9 with Mk''.
|
||||
.SH HISTORY
|
||||
Andrew Hume wrote
|
||||
.I mk
|
||||
for Tenth Edition Research Unix.
|
||||
It was later ported to Plan 9.
|
||||
This software is a port of the Plan 9 version back to Unix.
|
||||
.SH BUGS
|
||||
Identical recipes for regular expression meta-rules only have one target.
|
||||
.PP
|
||||
Seemingly appropriate input like
|
||||
.B CFLAGS=-DHZ=60
|
||||
is parsed as an erroneous attribute; correct it by inserting
|
||||
a space after the first
|
||||
.LR = .
|
||||
.PP
|
||||
The recipes printed by
|
||||
.I mk
|
||||
before being passed to
|
||||
the shell
|
||||
for execution are sometimes erroneously expanded
|
||||
for printing. Don't trust what's printed; rely
|
||||
on what the shell
|
||||
does.
|
||||
48
unix/man/mkfile
Normal file
48
unix/man/mkfile
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
MAN=\
|
||||
isalpharune.3\
|
||||
rune.3\
|
||||
runestrcat.3\
|
||||
utf.7\
|
||||
print.3\
|
||||
fmtinstall.3\
|
||||
quote.3\
|
||||
fmtstrtod.3\
|
||||
bio.3\
|
||||
regexp9.3\
|
||||
regexp9.7\
|
||||
mk.1\
|
||||
|
||||
HTML=\
|
||||
isalpharune3.html\
|
||||
rune3.html\
|
||||
runestrcat3.html\
|
||||
utf7.html\
|
||||
print3.html\
|
||||
fmtinstall3.html\
|
||||
quote3.html\
|
||||
fmtstrtod3.html\
|
||||
bio3.html\
|
||||
regexp93.html\
|
||||
regexp97.html\
|
||||
mk1.html\
|
||||
|
||||
all:V: $MAN $HTML
|
||||
|
||||
title='Ported from Plan 9'
|
||||
MKSHELL=$PLAN9/bin/rc
|
||||
|
||||
%1.html:D: %.1
|
||||
whatis title
|
||||
9 troff -manhtml $prereq | troff2html -t $title > $target
|
||||
./fixurls $target
|
||||
|
||||
%3.html:D: %.3
|
||||
9 troff -manhtml $prereq | troff2html -t $title > $target
|
||||
./fixurls $target
|
||||
|
||||
%7.html:D: %.7
|
||||
9 troff -manhtml $prereq | troff2html -t $title > $target
|
||||
./fixurls $target
|
||||
|
||||
push:V:
|
||||
rsync -e ssh *.html swtch:www/swtch.com/plan9port/unix/man
|
||||
474
unix/man/print.3
Normal file
474
unix/man/print.3
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
.\" diffs from /usr/local/plan9/man/man3/print.3:
|
||||
.\"
|
||||
.\" - include different headers
|
||||
.\" - drop reference to bio(3)
|
||||
.\" - change exits to exit
|
||||
.\" - text about unsigned verbs
|
||||
.\" - source pointer
|
||||
.\"
|
||||
.TH PRINT 3
|
||||
.SH NAME
|
||||
print, fprint, sprint, snprint, seprint, smprint, runesprint, runesnprint, runeseprint, runesmprint, vfprint, vsnprint, vseprint, vsmprint, runevsnprint, runevseprint, runevsmprint \- print formatted output
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.PP
|
||||
.B #include <fmt.h>
|
||||
.PP
|
||||
.ta \w'\fLchar* 'u
|
||||
.B
|
||||
int print(char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int fprint(int fd, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int sprint(char *s, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int snprint(char *s, int len, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
char* seprint(char *s, char *e, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
char* smprint(char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int runesprint(Rune *s, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int runesnprint(Rune *s, int len, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
Rune* runeseprint(Rune *s, Rune *e, char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
Rune* runesmprint(char *format, ...)
|
||||
.PP
|
||||
.B
|
||||
int vfprint(int fd, char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
int vsnprint(char *s, int len, char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
char* vseprint(char *s, char *e, char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
char* vsmprint(char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
int runevsnprint(Rune *s, int len, char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
Rune* runevseprint(Rune *s, Rune *e, char *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
Rune* runevsmprint(Rune *format, va_list v)
|
||||
.PP
|
||||
.B
|
||||
.SH DESCRIPTION
|
||||
.I Print
|
||||
writes text to the standard output.
|
||||
.I Fprint
|
||||
writes to the named output
|
||||
file descriptor:
|
||||
a buffered form
|
||||
is described in
|
||||
.IR bio (3).
|
||||
.I Sprint
|
||||
places text
|
||||
followed by the NUL character
|
||||
.RB ( \e0 )
|
||||
in consecutive bytes starting at
|
||||
.IR s ;
|
||||
it is the user's responsibility to ensure that
|
||||
enough storage is available.
|
||||
Each function returns the number of bytes
|
||||
transmitted (not including the NUL
|
||||
in the case of
|
||||
.IR sprint ),
|
||||
or
|
||||
a negative value if an output error was encountered.
|
||||
.PP
|
||||
.I Snprint
|
||||
is like
|
||||
.IR sprint ,
|
||||
but will not place more than
|
||||
.I len
|
||||
bytes in
|
||||
.IR s .
|
||||
Its result is always NUL-terminated and holds the maximal
|
||||
number of complete UTF-8 characters that can fit.
|
||||
.I Seprint
|
||||
is like
|
||||
.IR snprint ,
|
||||
except that the end is indicated by a pointer
|
||||
.I e
|
||||
rather than a count and the return value points to the terminating NUL of the
|
||||
resulting string.
|
||||
.I Smprint
|
||||
is like
|
||||
.IR sprint ,
|
||||
except that it prints into and returns a string of the required length, which is
|
||||
allocated by
|
||||
.IR malloc (3).
|
||||
.PP
|
||||
The routines
|
||||
.IR runesprint ,
|
||||
.IR runesnprint ,
|
||||
.IR runeseprint ,
|
||||
and
|
||||
.I runesmprint
|
||||
are the same as
|
||||
.IR sprint ,
|
||||
.IR snprint ,
|
||||
.IR seprint
|
||||
and
|
||||
.I smprint
|
||||
except that their output is rune strings instead of byte strings.
|
||||
.PP
|
||||
Finally, the routines
|
||||
.IR vfprint ,
|
||||
.IR vsnprint ,
|
||||
.IR vseprint ,
|
||||
.IR vsmprint ,
|
||||
.IR runevsnprint ,
|
||||
.IR runevseprint ,
|
||||
and
|
||||
.I runevsmprint
|
||||
are like their
|
||||
.BR v-less
|
||||
relatives except they take as arguments a
|
||||
.B va_list
|
||||
parameter, so they can be called within a variadic function.
|
||||
The Example section shows a representative usage.
|
||||
.PP
|
||||
Each of these functions
|
||||
converts, formats, and prints its
|
||||
trailing arguments
|
||||
under control of a
|
||||
.IR format
|
||||
string.
|
||||
The
|
||||
format
|
||||
contains two types of objects:
|
||||
plain characters, which are simply copied to the
|
||||
output stream,
|
||||
and conversion specifications,
|
||||
each of which results in fetching of
|
||||
zero or more
|
||||
arguments.
|
||||
The results are undefined if there are arguments of the
|
||||
wrong type or too few
|
||||
arguments for the format.
|
||||
If the format is exhausted while
|
||||
arguments remain, the excess
|
||||
is ignored.
|
||||
.PP
|
||||
Each conversion specification has the following format:
|
||||
.IP
|
||||
.B "% [flags] verb
|
||||
.PP
|
||||
The verb is a single character and each flag is a single character or a
|
||||
(decimal) numeric string.
|
||||
Up to two numeric strings may be used;
|
||||
the first is called
|
||||
.IR width ,
|
||||
the second
|
||||
.IR precision .
|
||||
A period can be used to separate them, and if the period is
|
||||
present then
|
||||
.I width
|
||||
and
|
||||
.I precision
|
||||
are taken to be zero if missing, otherwise they are `omitted'.
|
||||
Either or both of the numbers may be replaced with the character
|
||||
.BR * ,
|
||||
meaning that the actual number will be obtained from the argument list
|
||||
as an integer.
|
||||
The flags and numbers are arguments to
|
||||
the
|
||||
.I verb
|
||||
described below.
|
||||
.PP
|
||||
The numeric verbs
|
||||
.BR d ,
|
||||
.BR i ,
|
||||
.BR u ,
|
||||
.BR o ,
|
||||
.BR b ,
|
||||
.BR x ,
|
||||
and
|
||||
.B X
|
||||
format their arguments in decimal, decimal,
|
||||
unsigned decimal, octal, binary, hexadecimal, and upper case hexadecimal.
|
||||
Each interprets the flags
|
||||
.BR 0 ,
|
||||
.BR h ,
|
||||
.BR hh ,
|
||||
.BR l ,
|
||||
.BR + ,
|
||||
.BR - ,
|
||||
.BR , ,
|
||||
and
|
||||
.B #
|
||||
to mean pad with zeros,
|
||||
short, byte, long, always print a sign, left justified, commas every three digits,
|
||||
and alternate format.
|
||||
Also, a space character in the flag
|
||||
position is like
|
||||
.BR + ,
|
||||
but prints a space instead of a plus sign for non-negative values.
|
||||
If neither
|
||||
short nor long is specified,
|
||||
then the argument is an
|
||||
.BR int .
|
||||
If an unsigned verb is specified,
|
||||
then the argument is interpreted as a
|
||||
positive number and no sign is output;
|
||||
space and
|
||||
.B +
|
||||
flags are ignored for unsigned verbs.
|
||||
If two
|
||||
.B l
|
||||
flags are given,
|
||||
then the argument is interpreted as a
|
||||
.B vlong
|
||||
(usually an 8-byte, sometimes a 4-byte integer).
|
||||
If
|
||||
.I precision
|
||||
is not omitted, the number is padded on the left with zeros
|
||||
until at least
|
||||
.I precision
|
||||
digits appear.
|
||||
If
|
||||
.I precision
|
||||
is explicitly 0, and the number is 0,
|
||||
no digits are generated, and alternate formatting
|
||||
does not apply.
|
||||
Then, if alternate format is specified,
|
||||
for
|
||||
.B o
|
||||
conversion, the number is preceded by a
|
||||
.B 0
|
||||
if it doesn't already begin with one.
|
||||
For non-zero numbers and
|
||||
.B x
|
||||
conversion, the number is preceded by
|
||||
.BR 0x ;
|
||||
for
|
||||
.B X
|
||||
conversion, the number is preceded by
|
||||
.BR 0X .
|
||||
Finally, if
|
||||
.I width
|
||||
is not omitted, the number is padded on the left (or right, if
|
||||
left justification is specified) with enough blanks to
|
||||
make the field at least
|
||||
.I width
|
||||
characters long.
|
||||
.PP
|
||||
The floating point verbs
|
||||
.BR f ,
|
||||
.BR e ,
|
||||
.BR E ,
|
||||
.BR g ,
|
||||
and
|
||||
.B G
|
||||
take a
|
||||
.B double
|
||||
argument.
|
||||
Each interprets the flags
|
||||
.BR 0 ,
|
||||
.BR L
|
||||
.BR + ,
|
||||
.BR - ,
|
||||
and
|
||||
.B #
|
||||
to mean pad with zeros,
|
||||
long double argument,
|
||||
always print a sign,
|
||||
left justified,
|
||||
and
|
||||
alternate format.
|
||||
.I Width
|
||||
is the minimum field width and,
|
||||
if the converted value takes up less than
|
||||
.I width
|
||||
characters, it is padded on the left (or right, if `left justified')
|
||||
with spaces.
|
||||
.I Precision
|
||||
is the number of digits that are converted after the decimal place for
|
||||
.BR e ,
|
||||
.BR E ,
|
||||
and
|
||||
.B f
|
||||
conversions,
|
||||
and
|
||||
.I precision
|
||||
is the maximum number of significant digits for
|
||||
.B g
|
||||
and
|
||||
.B G
|
||||
conversions.
|
||||
The
|
||||
.B f
|
||||
verb produces output of the form
|
||||
.RB [ - ] digits [ .digits\fR].
|
||||
.B E
|
||||
conversion appends an exponent
|
||||
.BR E [ - ] digits ,
|
||||
and
|
||||
.B e
|
||||
conversion appends an exponent
|
||||
.BR e [ - ] digits .
|
||||
The
|
||||
.B g
|
||||
verb will output the argument in either
|
||||
.B e
|
||||
or
|
||||
.B f
|
||||
with the goal of producing the smallest output.
|
||||
Also, trailing zeros are omitted from the fraction part of
|
||||
the output, and a trailing decimal point appears only if it is followed
|
||||
by a digit.
|
||||
The
|
||||
.B G
|
||||
verb is similar, but uses
|
||||
.B E
|
||||
format instead of
|
||||
.BR e .
|
||||
When alternate format is specified, the result will always contain a decimal point,
|
||||
and for
|
||||
.B g
|
||||
and
|
||||
.B G
|
||||
conversions, trailing zeros are not removed.
|
||||
.PP
|
||||
The
|
||||
.B s
|
||||
verb copies a string
|
||||
(pointer to
|
||||
.BR char )
|
||||
to the output.
|
||||
The number of characters copied
|
||||
.RI ( n )
|
||||
is the minimum
|
||||
of the size of the string and
|
||||
.IR precision .
|
||||
These
|
||||
.I n
|
||||
characters are justified within a field of
|
||||
.I width
|
||||
characters as described above.
|
||||
If a
|
||||
.I precision
|
||||
is given, it is safe for the string not to be nul-terminated
|
||||
as long as it is at least
|
||||
.I precision
|
||||
characters (not bytes!) long.
|
||||
The
|
||||
.B S
|
||||
verb is similar, but it interprets its pointer as an array
|
||||
of runes (see
|
||||
.IR utf (7));
|
||||
the runes are converted to
|
||||
.SM UTF
|
||||
before output.
|
||||
.PP
|
||||
The
|
||||
.B c
|
||||
verb copies a single
|
||||
.B char
|
||||
(promoted to
|
||||
.BR int )
|
||||
justified within a field of
|
||||
.I width
|
||||
characters as described above.
|
||||
The
|
||||
.B C
|
||||
verb is similar, but works on runes.
|
||||
.PP
|
||||
The
|
||||
.B p
|
||||
verb formats a pointer value.
|
||||
At the moment, it is a synonym for
|
||||
.BR x ,
|
||||
but that will change if pointers and integers are different sizes.
|
||||
.PP
|
||||
The
|
||||
.B r
|
||||
verb takes no arguments; it copies the error string returned by a call to
|
||||
.IR strerror (3)
|
||||
with an argument of
|
||||
.IR errno.
|
||||
.PP
|
||||
Custom verbs may be installed using
|
||||
.IR fmtinstall (3).
|
||||
.SH EXAMPLE
|
||||
This function prints an error message with a variable
|
||||
number of arguments and then quits.
|
||||
.IP
|
||||
.EX
|
||||
.ta 6n +6n +6n
|
||||
void fatal(char *msg, ...)
|
||||
{
|
||||
char buf[1024], *out;
|
||||
va_list arg;
|
||||
|
||||
out = seprint(buf, buf+sizeof buf, "Fatal error: ");
|
||||
va_start(arg, msg);
|
||||
out = vseprint(out, buf+sizeof buf, msg, arg);
|
||||
va_end(arg);
|
||||
write(2, buf, out-buf);
|
||||
exit(1);
|
||||
}
|
||||
.EE
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR fmtinstall (3),
|
||||
.IR fprintf (3),
|
||||
.IR utf (7)
|
||||
.SH DIAGNOSTICS
|
||||
Routines that write to a file descriptor or call
|
||||
.IR malloc
|
||||
set
|
||||
.IR errstr .
|
||||
.SH BUGS
|
||||
The formatting is close to that specified for ANSI
|
||||
.IR fprintf (3);
|
||||
the main difference is that
|
||||
.B b
|
||||
and
|
||||
.B r
|
||||
are not in ANSI and some
|
||||
.B C9X
|
||||
verbs and syntax are missing.
|
||||
Also, and distinctly not a bug,
|
||||
.I print
|
||||
and friends generate
|
||||
.SM UTF
|
||||
rather than
|
||||
.SM ASCII.
|
||||
.PP
|
||||
There is no
|
||||
.IR runeprint ,
|
||||
.IR runefprint ,
|
||||
etc. because runes are byte-order dependent and should not be written directly to a file; use the
|
||||
UTF output of
|
||||
.I print
|
||||
or
|
||||
.I fprint
|
||||
instead.
|
||||
Also,
|
||||
.I sprint
|
||||
is deprecated for safety reasons; use
|
||||
.IR snprint ,
|
||||
.IR seprint ,
|
||||
or
|
||||
.I smprint
|
||||
instead.
|
||||
Safety also precludes the existence of
|
||||
.IR runesprint .
|
||||
151
unix/man/quote.3
Normal file
151
unix/man/quote.3
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
.TH QUOTE 3
|
||||
.SH NAME
|
||||
quotestrdup, quoterunestrdup, unquotestrdup, unquoterunestrdup, quotestrfmt, quoterunestrfmt, quotefmtinstall, fmtdoquote \- quoted character strings
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.br
|
||||
.B #include <fmt.h>
|
||||
.PP
|
||||
.B
|
||||
char *quotestrdup(char *s)
|
||||
.PP
|
||||
.B
|
||||
Rune *quoterunestrdup(Rune *s)
|
||||
.PP
|
||||
.B
|
||||
char *unquotestrdup(char *s)
|
||||
.PP
|
||||
.B
|
||||
Rune *unquoterunestrdup(Rune *s)
|
||||
.PP
|
||||
.B
|
||||
int quotestrfmt(Fmt*)
|
||||
.PP
|
||||
.B
|
||||
int quoterunestrfmt(Fmt*)
|
||||
.PP
|
||||
.B
|
||||
void quotefmtinstall(void)
|
||||
.PP
|
||||
.B
|
||||
int (*fmtdoquote)(int c)
|
||||
.PP
|
||||
.SH DESCRIPTION
|
||||
These routines manipulate character strings, either adding or removing
|
||||
quotes as necessary.
|
||||
In the quoted form, the strings are in the style of
|
||||
.IR rc (1) ,
|
||||
with single quotes surrounding the string.
|
||||
Embedded single quotes are indicated by a doubled single quote.
|
||||
For instance,
|
||||
.IP
|
||||
.EX
|
||||
Don't worry!
|
||||
.EE
|
||||
.PP
|
||||
when quoted becomes
|
||||
.IP
|
||||
.EX
|
||||
\&'Don''t worry!'
|
||||
.EE
|
||||
.PP
|
||||
The empty string is represented by two quotes,
|
||||
.BR '' .
|
||||
.PP
|
||||
The first four functions act as variants of
|
||||
.B strdup
|
||||
(see
|
||||
.IR strcat (3)).
|
||||
Each returns a
|
||||
freshly allocated copy of the string, created using
|
||||
.IR malloc (3).
|
||||
.I Quotestrdup
|
||||
returns a quoted copy of
|
||||
.IR s ,
|
||||
while
|
||||
.I unquotestrdup
|
||||
returns a copy of
|
||||
.IR s
|
||||
with the quotes evaluated.
|
||||
The
|
||||
.I rune
|
||||
versions of these functions do the same for
|
||||
.CW Rune
|
||||
strings (see
|
||||
.IR runestrcat (3)).
|
||||
.PP
|
||||
The string returned by
|
||||
.I quotestrdup
|
||||
or
|
||||
.I quoterunestrdup
|
||||
has the following properties:
|
||||
.TP
|
||||
1.
|
||||
If the original string
|
||||
.IR s
|
||||
is empty, the returned string is
|
||||
.BR '' .
|
||||
.TP
|
||||
2.
|
||||
If
|
||||
.I s
|
||||
contains no quotes, blanks, or control characters,
|
||||
the returned string is identical to
|
||||
.IR s .
|
||||
.TP
|
||||
3.
|
||||
If
|
||||
.I s
|
||||
needs quotes to be added, the first character of the returned
|
||||
string will be a quote.
|
||||
For example,
|
||||
.B hello\ world
|
||||
becomes
|
||||
.B \&'hello\ world'
|
||||
not
|
||||
.BR hello'\ 'world .
|
||||
.PP
|
||||
The function pointer
|
||||
.I fmtdoquote
|
||||
is
|
||||
.B nil
|
||||
by default.
|
||||
If it is non-nil, characters are passed to that function to see if they should
|
||||
be quoted.
|
||||
This mechanism allows programs to specify that
|
||||
characters other than blanks, control characters, or quotes be quoted.
|
||||
Regardless of the return value of
|
||||
.IR *fmtdoquote ,
|
||||
blanks, control characters, and quotes are always quoted.
|
||||
.I Needsrcquote
|
||||
is provided as a
|
||||
.I fmtdoquote
|
||||
function that flags any character special to
|
||||
.IR rc (1).
|
||||
.PP
|
||||
.I Quotestrfmt
|
||||
and
|
||||
.I quoterunestrfmt
|
||||
are
|
||||
.IR print (3)
|
||||
formatting routines that produce quoted strings as output.
|
||||
They may be installed by hand, but
|
||||
.I quotefmtinstall
|
||||
installs them under the standard format characters
|
||||
.B q
|
||||
and
|
||||
.BR Q .
|
||||
(They are not installed automatically.)
|
||||
If the format string includes the alternate format character
|
||||
.BR # ,
|
||||
for example
|
||||
.BR %#q ,
|
||||
the printed string will always be quoted; otherwise quotes will only be provided if necessary
|
||||
to avoid ambiguity.
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH "SEE ALSO
|
||||
.IR rc (1),
|
||||
.IR malloc (3),
|
||||
.IR print (3),
|
||||
.IR strcat (3)
|
||||
212
unix/man/regexp9.3
Normal file
212
unix/man/regexp9.3
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
.TH REGEXP9 3
|
||||
.SH NAME
|
||||
regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression
|
||||
.SH SYNOPSIS
|
||||
.B #include <utf.h>
|
||||
.br
|
||||
.B #include <fmt.h>
|
||||
.br
|
||||
.B #include <regexp9.h>
|
||||
.PP
|
||||
.ta \w'\fLRegprog 'u
|
||||
.B
|
||||
Reprog *regcomp(char *exp)
|
||||
.PP
|
||||
.B
|
||||
Reprog *regcomplit(char *exp)
|
||||
.PP
|
||||
.B
|
||||
Reprog *regcompnl(char *exp)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
int regexec(Reprog *prog, char *string, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
int rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
|
||||
.PP
|
||||
.B
|
||||
void regerror(char *msg)
|
||||
.SH DESCRIPTION
|
||||
.I Regcomp
|
||||
compiles a
|
||||
regular expression and returns
|
||||
a pointer to the generated description.
|
||||
The space is allocated by
|
||||
.IR malloc (3)
|
||||
and may be released by
|
||||
.IR free .
|
||||
Regular expressions are exactly as in
|
||||
.IR regexp9 (7).
|
||||
.PP
|
||||
.I Regcomplit
|
||||
is like
|
||||
.I regcomp
|
||||
except that all characters are treated literally.
|
||||
.I Regcompnl
|
||||
is like
|
||||
.I regcomp
|
||||
except that the
|
||||
.B .
|
||||
metacharacter matches all characters, including newlines.
|
||||
.PP
|
||||
.I Regexec
|
||||
matches a null-terminated
|
||||
.I string
|
||||
against the compiled regular expression in
|
||||
.IR prog .
|
||||
If it matches,
|
||||
.I regexec
|
||||
returns
|
||||
.B 1
|
||||
and fills in the array
|
||||
.I match
|
||||
with character pointers to the substrings of
|
||||
.I string
|
||||
that correspond to the
|
||||
parenthesized subexpressions of
|
||||
.IR exp :
|
||||
.BI match[ i ].sp
|
||||
points to the beginning and
|
||||
.BI match[ i ].ep
|
||||
points just beyond
|
||||
the end of the
|
||||
.IR i th
|
||||
substring.
|
||||
(Subexpression
|
||||
.I i
|
||||
begins at the
|
||||
.IR i th
|
||||
left parenthesis, counting from 1.)
|
||||
Pointers in
|
||||
.B match[0]
|
||||
pick out the substring that corresponds to
|
||||
the whole regular expression.
|
||||
Unused elements of
|
||||
.I match
|
||||
are filled with zeros.
|
||||
Matches involving
|
||||
.LR * ,
|
||||
.LR + ,
|
||||
and
|
||||
.L ?
|
||||
are extended as far as possible.
|
||||
The number of array elements in
|
||||
.I match
|
||||
is given by
|
||||
.IR msize .
|
||||
The structure of elements of
|
||||
.I match
|
||||
is:
|
||||
.IP
|
||||
.EX
|
||||
typedef struct {
|
||||
union {
|
||||
char *sp;
|
||||
Rune *rsp;
|
||||
};
|
||||
union {
|
||||
char *ep;
|
||||
Rune *rep;
|
||||
};
|
||||
} Resub;
|
||||
.EE
|
||||
.LP
|
||||
If
|
||||
.B match[0].sp
|
||||
is nonzero on entry,
|
||||
.I regexec
|
||||
starts matching at that point within
|
||||
.IR string .
|
||||
If
|
||||
.B match[0].ep
|
||||
is nonzero on entry,
|
||||
the last character matched is the one
|
||||
preceding that point.
|
||||
.PP
|
||||
.I Regsub
|
||||
places in
|
||||
.I dest
|
||||
a substitution instance of
|
||||
.I source
|
||||
in the context of the last
|
||||
.I regexec
|
||||
performed using
|
||||
.IR match .
|
||||
Each instance of
|
||||
.BI \e n\f1,
|
||||
where
|
||||
.I n
|
||||
is a digit, is replaced by the
|
||||
string delimited by
|
||||
.BI match[ n ].sp
|
||||
and
|
||||
.BI match[ n ].ep\f1.
|
||||
Each instance of
|
||||
.L &
|
||||
is replaced by the string delimited by
|
||||
.B match[0].sp
|
||||
and
|
||||
.BR match[0].ep .
|
||||
The substitution will always be null terminated and
|
||||
trimmed to fit into dlen bytes.
|
||||
.PP
|
||||
.IR Regerror ,
|
||||
called whenever an error is detected in
|
||||
.IR regcomp ,
|
||||
writes the string
|
||||
.I msg
|
||||
on the standard error file and exits.
|
||||
.I Regerror
|
||||
can be replaced to perform
|
||||
special error processing.
|
||||
If the user supplied
|
||||
.I regerror
|
||||
returns rather than exits,
|
||||
.I regcomp
|
||||
will return 0.
|
||||
.PP
|
||||
.I Rregexec
|
||||
and
|
||||
.I rregsub
|
||||
are variants of
|
||||
.I regexec
|
||||
and
|
||||
.I regsub
|
||||
that use strings of
|
||||
.B Runes
|
||||
instead of strings of
|
||||
.BR chars .
|
||||
With these routines, the
|
||||
.I rsp
|
||||
and
|
||||
.I rep
|
||||
fields of the
|
||||
.I match
|
||||
array elements should be used.
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH "SEE ALSO"
|
||||
.IR grep (1)
|
||||
.SH DIAGNOSTICS
|
||||
.I Regcomp
|
||||
returns
|
||||
.B 0
|
||||
for an illegal expression
|
||||
or other failure.
|
||||
.I Regexec
|
||||
returns 0
|
||||
if
|
||||
.I string
|
||||
is not matched.
|
||||
.SH BUGS
|
||||
There is no way to specify or match a NUL character; NULs terminate patterns and strings.
|
||||
133
unix/man/regexp9.7
Normal file
133
unix/man/regexp9.7
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
.TH REGEXP9 7
|
||||
.SH NAME
|
||||
regexp \- Plan 9 regular expression notation
|
||||
.SH DESCRIPTION
|
||||
This manual page describes the regular expression
|
||||
syntax used by the Plan 9 regular expression library
|
||||
.IR regexp9 (3).
|
||||
It is the form used by
|
||||
.IR egrep (1)
|
||||
before
|
||||
.I egrep
|
||||
got complicated.
|
||||
.PP
|
||||
A
|
||||
.I "regular expression"
|
||||
specifies
|
||||
a set of strings of characters.
|
||||
A member of this set of strings is said to be
|
||||
.I matched
|
||||
by the regular expression. In many applications
|
||||
a delimiter character, commonly
|
||||
.LR / ,
|
||||
bounds a regular expression.
|
||||
In the following specification for regular expressions
|
||||
the word `character' means any character (rune) but newline.
|
||||
.PP
|
||||
The syntax for a regular expression
|
||||
.B e0
|
||||
is
|
||||
.IP
|
||||
.EX
|
||||
e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')'
|
||||
|
||||
e2: e3
|
||||
| e2 REP
|
||||
|
||||
REP: '*' | '+' | '?'
|
||||
|
||||
e1: e2
|
||||
| e1 e2
|
||||
|
||||
e0: e1
|
||||
| e0 '|' e1
|
||||
.EE
|
||||
.PP
|
||||
A
|
||||
.B literal
|
||||
is any non-metacharacter, or a metacharacter
|
||||
(one of
|
||||
.BR .*+?[]()|\e^$ ),
|
||||
or the delimiter
|
||||
preceded by
|
||||
.LR \e .
|
||||
.PP
|
||||
A
|
||||
.B charclass
|
||||
is a nonempty string
|
||||
.I s
|
||||
bracketed
|
||||
.BI [ \|s\| ]
|
||||
(or
|
||||
.BI [^ s\| ]\fR);
|
||||
it matches any character in (or not in)
|
||||
.IR s .
|
||||
A negated character class never
|
||||
matches newline.
|
||||
A substring
|
||||
.IB a - b\f1,
|
||||
with
|
||||
.I a
|
||||
and
|
||||
.I b
|
||||
in ascending
|
||||
order, stands for the inclusive
|
||||
range of
|
||||
characters between
|
||||
.I a
|
||||
and
|
||||
.IR b .
|
||||
In
|
||||
.IR s ,
|
||||
the metacharacters
|
||||
.LR - ,
|
||||
.LR ] ,
|
||||
an initial
|
||||
.LR ^ ,
|
||||
and the regular expression delimiter
|
||||
must be preceded by a
|
||||
.LR \e ;
|
||||
other metacharacters
|
||||
have no special meaning and
|
||||
may appear unescaped.
|
||||
.PP
|
||||
A
|
||||
.L .
|
||||
matches any character.
|
||||
.PP
|
||||
A
|
||||
.L ^
|
||||
matches the beginning of a line;
|
||||
.L $
|
||||
matches the end of the line.
|
||||
.PP
|
||||
The
|
||||
.B REP
|
||||
operators match zero or more
|
||||
.RB ( * ),
|
||||
one or more
|
||||
.RB ( + ),
|
||||
zero or one
|
||||
.RB ( ? ),
|
||||
instances respectively of the preceding regular expression
|
||||
.BR e2 .
|
||||
.PP
|
||||
A concatenated regular expression,
|
||||
.BR "e1\|e2" ,
|
||||
matches a match to
|
||||
.B e1
|
||||
followed by a match to
|
||||
.BR e2 .
|
||||
.PP
|
||||
An alternative regular expression,
|
||||
.BR "e0\||\|e1" ,
|
||||
matches either a match to
|
||||
.B e0
|
||||
or a match to
|
||||
.BR e1 .
|
||||
.PP
|
||||
A match to any part of a regular expression
|
||||
extends as far as possible without preventing
|
||||
a match to the remainder of the regular expression.
|
||||
.SH "SEE ALSO
|
||||
.IR regexp9 (3)
|
||||
186
unix/man/rune.3
Normal file
186
unix/man/rune.3
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
.TH RUNE 3
|
||||
.SH NAME
|
||||
runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
|
||||
.SH SYNOPSIS
|
||||
.ta \w'\fLchar*xx'u
|
||||
.B #include <utf.h>
|
||||
.PP
|
||||
.B
|
||||
int runetochar(char *s, Rune *r)
|
||||
.PP
|
||||
.B
|
||||
int chartorune(Rune *r, char *s)
|
||||
.PP
|
||||
.B
|
||||
int runelen(long r)
|
||||
.PP
|
||||
.B
|
||||
int runenlen(Rune *r, int n)
|
||||
.PP
|
||||
.B
|
||||
int fullrune(char *s, int n)
|
||||
.PP
|
||||
.B
|
||||
char* utfecpy(char *s1, char *es1, char *s2)
|
||||
.PP
|
||||
.B
|
||||
int utflen(char *s)
|
||||
.PP
|
||||
.B
|
||||
int utfnlen(char *s, long n)
|
||||
.PP
|
||||
.B
|
||||
char* utfrune(char *s, long c)
|
||||
.PP
|
||||
.B
|
||||
char* utfrrune(char *s, long c)
|
||||
.PP
|
||||
.B
|
||||
char* utfutf(char *s1, char *s2)
|
||||
.SH DESCRIPTION
|
||||
These routines convert to and from a
|
||||
.SM UTF
|
||||
byte stream and runes.
|
||||
.PP
|
||||
.I Runetochar
|
||||
copies one rune at
|
||||
.I r
|
||||
to at most
|
||||
.B UTFmax
|
||||
bytes starting at
|
||||
.I s
|
||||
and returns the number of bytes copied.
|
||||
.BR UTFmax ,
|
||||
defined as
|
||||
.B 3
|
||||
in
|
||||
.BR <libc.h> ,
|
||||
is the maximum number of bytes required to represent a rune.
|
||||
.PP
|
||||
.I Chartorune
|
||||
copies at most
|
||||
.B UTFmax
|
||||
bytes starting at
|
||||
.I s
|
||||
to one rune at
|
||||
.I r
|
||||
and returns the number of bytes copied.
|
||||
If the input is not exactly in
|
||||
.SM UTF
|
||||
format,
|
||||
.I chartorune
|
||||
will convert to 0x80 and return 1.
|
||||
.PP
|
||||
.I Runelen
|
||||
returns the number of bytes
|
||||
required to convert
|
||||
.I r
|
||||
into
|
||||
.SM UTF.
|
||||
.PP
|
||||
.I Runenlen
|
||||
returns the number of bytes
|
||||
required to convert the
|
||||
.I n
|
||||
runes pointed to by
|
||||
.I r
|
||||
into
|
||||
.SM UTF.
|
||||
.PP
|
||||
.I Fullrune
|
||||
returns 1 if the string
|
||||
.I s
|
||||
of length
|
||||
.I n
|
||||
is long enough to be decoded by
|
||||
.I chartorune
|
||||
and 0 otherwise.
|
||||
This does not guarantee that the string
|
||||
contains a legal
|
||||
.SM UTF
|
||||
encoding.
|
||||
This routine is used by programs that
|
||||
obtain input a byte at
|
||||
a time and need to know when a full rune
|
||||
has arrived.
|
||||
.PP
|
||||
The following routines are analogous to the
|
||||
corresponding string routines with
|
||||
.B utf
|
||||
substituted for
|
||||
.B str
|
||||
and
|
||||
.B rune
|
||||
substituted for
|
||||
.BR chr .
|
||||
.PP
|
||||
.I Utfecpy
|
||||
copies UTF sequences until a null sequence has been copied, but writes no
|
||||
sequences beyond
|
||||
.IR es1 .
|
||||
If any sequences are copied,
|
||||
.I s1
|
||||
is terminated by a null sequence, and a pointer to that sequence is returned.
|
||||
Otherwise, the original
|
||||
.I s1
|
||||
is returned.
|
||||
.PP
|
||||
.I Utflen
|
||||
returns the number of runes that
|
||||
are represented by the
|
||||
.SM UTF
|
||||
string
|
||||
.IR s .
|
||||
.PP
|
||||
.I Utfnlen
|
||||
returns the number of complete runes that
|
||||
are represented by the first
|
||||
.I n
|
||||
bytes of
|
||||
.SM UTF
|
||||
string
|
||||
.IR s .
|
||||
If the last few bytes of the string contain an incompletely coded rune,
|
||||
.I utfnlen
|
||||
will not count them; in this way, it differs from
|
||||
.IR utflen ,
|
||||
which includes every byte of the string.
|
||||
.PP
|
||||
.I Utfrune
|
||||
.RI ( utfrrune )
|
||||
returns a pointer to the first (last)
|
||||
occurrence of rune
|
||||
.I c
|
||||
in the
|
||||
.SM UTF
|
||||
string
|
||||
.IR s ,
|
||||
or 0 if
|
||||
.I c
|
||||
does not occur in the string.
|
||||
The NUL byte terminating a string is considered to
|
||||
be part of the string
|
||||
.IR s .
|
||||
.PP
|
||||
.I Utfutf
|
||||
returns a pointer to the first occurrence of
|
||||
the
|
||||
.SM UTF
|
||||
string
|
||||
.I s2
|
||||
as a
|
||||
.SM UTF
|
||||
substring of
|
||||
.IR s1 ,
|
||||
or 0 if there is none.
|
||||
If
|
||||
.I s2
|
||||
is the null string,
|
||||
.I utfutf
|
||||
returns
|
||||
.IR s1 .
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR utf (7),
|
||||
.IR tcs (1)
|
||||
66
unix/man/runestrcat.3
Normal file
66
unix/man/runestrcat.3
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
.TH RUNESTRCAT 3
|
||||
.SH NAME
|
||||
runestrcat,
|
||||
runestrncat,
|
||||
runestrcmp,
|
||||
runestrncmp,
|
||||
runestrcpy,
|
||||
runestrncpy,
|
||||
runestrecpy,
|
||||
runestrlen,
|
||||
runestrchr,
|
||||
runestrrchr,
|
||||
runestrdup,
|
||||
runestrstr \- rune string operations
|
||||
.SH SYNOPSIS
|
||||
.B #include <u.h>
|
||||
.br
|
||||
.B #include <libc.h>
|
||||
.PP
|
||||
.ta \w'\fLRune* \fP'u
|
||||
.B
|
||||
Rune* runestrcat(Rune *s1, Rune *s2)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrncat(Rune *s1, Rune *s2, long n)
|
||||
.PP
|
||||
.B
|
||||
int runestrcmp(Rune *s1, Rune *s2)
|
||||
.PP
|
||||
.B
|
||||
int runestrncmp(Rune *s1, Rune *s2, long n)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrcpy(Rune *s1, Rune *s2)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrncpy(Rune *s1, Rune *s2, long n)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2)
|
||||
.PP
|
||||
.B
|
||||
long runestrlen(Rune *s)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrchr(Rune *s, Rune c)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrrchr(Rune *s, Rune c)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrdup(Rune *s)
|
||||
.PP
|
||||
.B
|
||||
Rune* runestrstr(Rune *s1, Rune *s2)
|
||||
.SH DESCRIPTION
|
||||
These functions are rune string analogues of
|
||||
the corresponding functions in
|
||||
.IR strcat (3).
|
||||
.SH SOURCE
|
||||
.B http://swtch.com/plan9port/unix
|
||||
.SH SEE ALSO
|
||||
.IR rune (3),
|
||||
.IR strcat (3)
|
||||
.SH BUGS
|
||||
The outcome of overlapping moves varies among implementations.
|
||||
91
unix/man/utf.7
Normal file
91
unix/man/utf.7
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
.TH UTF 7
|
||||
.SH NAME
|
||||
UTF, Unicode, ASCII, rune \- character set and format
|
||||
.SH DESCRIPTION
|
||||
The Plan 9 character set and representation are
|
||||
based on the Unicode Standard and on the ISO multibyte
|
||||
.SM UTF-8
|
||||
encoding (Universal Character
|
||||
Set Transformation Format, 8 bits wide).
|
||||
The Unicode Standard represents its characters in 16
|
||||
bits;
|
||||
.SM UTF-8
|
||||
represents such
|
||||
values in an 8-bit byte stream.
|
||||
Throughout this manual,
|
||||
.SM UTF-8
|
||||
is shortened to
|
||||
.SM UTF.
|
||||
.PP
|
||||
In Plan 9, a
|
||||
.I rune
|
||||
is a 16-bit quantity representing a Unicode character.
|
||||
Internally, programs may store characters as runes.
|
||||
However, any external manifestation of textual information,
|
||||
in files or at the interface between programs, uses a
|
||||
machine-independent, byte-stream encoding called
|
||||
.SM UTF.
|
||||
.PP
|
||||
.SM UTF
|
||||
is designed so the 7-bit
|
||||
.SM ASCII
|
||||
set (values hexadecimal 00 to 7F),
|
||||
appear only as themselves
|
||||
in the encoding.
|
||||
Runes with values above 7F appear as sequences of two or more
|
||||
bytes with values only from 80 to FF.
|
||||
.PP
|
||||
The
|
||||
.SM UTF
|
||||
encoding of the Unicode Standard is backward compatible with
|
||||
.SM ASCII\c
|
||||
:
|
||||
programs presented only with
|
||||
.SM ASCII
|
||||
work on Plan 9
|
||||
even if not written to deal with
|
||||
.SM UTF,
|
||||
as do
|
||||
programs that deal with uninterpreted byte streams.
|
||||
However, programs that perform semantic processing on
|
||||
.SM ASCII
|
||||
graphic
|
||||
characters must convert from
|
||||
.SM UTF
|
||||
to runes
|
||||
in order to work properly with non-\c
|
||||
.SM ASCII
|
||||
input.
|
||||
See
|
||||
.IR rune (3).
|
||||
.PP
|
||||
Letting numbers be binary,
|
||||
a rune x is converted to a multibyte
|
||||
.SM UTF
|
||||
sequence
|
||||
as follows:
|
||||
.PP
|
||||
01. x in [00000000.0bbbbbbb] → 0bbbbbbb
|
||||
.br
|
||||
10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
|
||||
.br
|
||||
11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb
|
||||
.br
|
||||
.PP
|
||||
Conversion 01 provides a one-byte sequence that spans the
|
||||
.SM ASCII
|
||||
character set in a compatible way.
|
||||
Conversions 10 and 11 represent higher-valued characters
|
||||
as sequences of two or three bytes with the high bit set.
|
||||
Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open.
|
||||
When there are multiple ways to encode a value, for example rune 0,
|
||||
the shortest encoding is used.
|
||||
.PP
|
||||
In the inverse mapping,
|
||||
any sequence except those described above
|
||||
is incorrect and is converted to rune hexadecimal 0080.
|
||||
.SH "SEE ALSO"
|
||||
.IR ascii (1),
|
||||
.IR tcs (1),
|
||||
.IR rune (3),
|
||||
.IR "The Unicode Standard" .
|
||||
Loading…
Add table
Add a link
Reference in a new issue