checkpoint

2005-01-14 03:45:44 +00:00 · 2005-01-14 03:45:44 +00:00 · 78e51a8c66
commit 78e51a8c66
parent 2634795b5f
314 changed files with 48199 additions and 300 deletions
--- a/unix/man/.cvsignore
+++ b/unix/man/.cvsignore
@ -0,0 +1 @@
+bio3.html fmtinstall3.html fmtstrtod3.html index.html isalpharune3.html mk1.html print3.html quote3.html regexp93.html regexp97.html rune3.html runestrcat3.html utf7.html
--- a/unix/man/bio.3
+++ b/unix/man/bio.3
@ -0,0 +1,363 @@
+.TH BIO 3
+.SH NAME
+Bopen, Bfdopen, Binit, Binits, Brdline, Brdstr, Bgetc, Bgetrune, Bgetd, Bungetc, Bungetrune, Bread, Bseek, Boffset, Bfildes, Blinelen, Bputc, Bputrune, Bprint, Bvprint, Bwrite, Bflush, Bterm, Bbuffered \- buffered input/output
+.SH SYNOPSIS
+.ta \w'\fLBiobuf* 'u
+.B #include <utf.h>
+.br
+.B #include <fmt.h>
+.br
+.B #include <bio.h>
+.PP
+.B
+Biobuf*	Bopen(char *file, int mode)
+.PP
+.B
+Biobuf*	Bfdopen(int fd, int mode)
+.PP
+.B
+int	Binit(Biobuf *bp, int fd, int mode)
+.PP
+.B
+int	Binits(Biobufhdr *bp, int fd, int mode, uchar *buf, int size)
+.PP
+.B
+int	Bterm(Biobufhdr *bp)
+.PP
+.B
+int	Bprint(Biobufhdr *bp, char *format, ...)
+.PP
+.B
+int	Bvprint(Biobufhdr *bp, char *format, va_list arglist);
+.PP
+.B
+void*	Brdline(Biobufhdr *bp, int delim)
+.PP
+.B
+char*	Brdstr(Biobufhdr *bp, int delim, int nulldelim)
+.PP
+.B
+int	Blinelen(Biobufhdr *bp)
+.PP
+.B
+vlong	Boffset(Biobufhdr *bp)
+.PP
+.B
+int	Bfildes(Biobufhdr *bp)
+.PP
+.B
+int	Bgetc(Biobufhdr *bp)
+.PP
+.B
+long	Bgetrune(Biobufhdr *bp)
+.PP
+.B
+int	Bgetd(Biobufhdr *bp, double *d)
+.PP
+.B
+int	Bungetc(Biobufhdr *bp)
+.PP
+.B
+int	Bungetrune(Biobufhdr *bp)
+.PP
+.B
+vlong	Bseek(Biobufhdr *bp, vlong n, int type)
+.PP
+.B
+int	Bputc(Biobufhdr *bp, int c)
+.PP
+.B
+int	Bputrune(Biobufhdr *bp, long c)
+.PP
+.B
+long	Bread(Biobufhdr *bp, void *addr, long nbytes)
+.PP
+.B
+long	Bwrite(Biobufhdr *bp, void *addr, long nbytes)
+.PP
+.B
+int	Bflush(Biobufhdr *bp)
+.PP
+.B
+int	Bbuffered(Biobufhdr *bp)
+.PP
+.SH DESCRIPTION
+These routines implement fast buffered I/O.
+I/O on different file descriptors is independent.
+.PP
+.I Bopen
+opens
+.I file
+for mode
+.B O_RDONLY
+or creates for mode
+.BR O_WRONLY .
+It calls
+.IR malloc (3)
+to allocate a buffer.
+.PP
+.I Bfdopen
+allocates a buffer for the already-open file descriptor
+.I fd
+for mode
+.B O_RDONLY
+or
+.BR O_WRONLY .
+It calls
+.IR malloc (3)
+to allocate a buffer.
+.PP
+.I Binit
+initializes a standard size buffer, type
+.IR Biobuf ,
+with the open file descriptor passed in
+by the user.
+.I Binits
+initializes a non-standard size buffer, type
+.IR Biobufhdr ,
+with the open file descriptor,
+buffer area, and buffer size passed in
+by the user.
+.I Biobuf
+and
+.I Biobufhdr
+are related by the declaration:
+.IP
+.EX
+typedef struct Biobuf Biobuf;
+struct Biobuf
+{
+	Biobufhdr;
+	uchar b[Bungetsize+Bsize];
+};
+.EE
+.PP
+Arguments
+of types pointer to Biobuf and pointer to Biobufhdr
+can be used interchangeably in the following routines.
+.PP
+.IR Bopen ,
+.IR Binit ,
+or
+.I Binits
+should be called before any of the
+other routines on that buffer.
+.I Bfildes
+returns the integer file descriptor of the associated open file.
+.PP
+.I Bterm
+flushes the buffer for
+.IR bp .
+If the buffer was allocated by
+.IR Bopen ,
+the buffer is
+.I freed
+and the file is closed.
+.PP
+.I Brdline
+reads a string from the file associated with
+.I bp
+up to and including the first
+.I delim
+character.
+The delimiter character at the end of the line is
+not altered.
+.I Brdline
+returns a pointer to the start of the line or
+.L 0
+on end-of-file or read error.
+.I Blinelen
+returns the length (including the delimiter)
+of the most recent string returned by
+.IR Brdline .
+.PP
+.I Brdstr
+returns a
+.IR malloc (3)-allocated
+buffer containing the next line of input delimited by
+.IR delim ,
+terminated by a NUL (0) byte.
+Unlike
+.IR Brdline ,
+which returns when its buffer is full even if no delimiter has been found,
+.I Brdstr
+will return an arbitrarily long line in a single call.
+If
+.I nulldelim
+is set, the terminal delimiter will be overwritten with a NUL.
+After a successful call to
+.IR Brdstr ,
+the return value of
+.I Blinelen
+will be the length of the returned buffer, excluding the NUL.
+.PP
+.I Bgetc
+returns the next character from
+.IR bp ,
+or a negative value
+at end of file.
+.I Bungetc
+may be called immediately after
+.I Bgetc
+to allow the same character to be reread.
+.PP
+.I Bgetrune
+calls
+.I Bgetc
+to read the bytes of the next
+.SM UTF
+sequence in the input stream and returns the value of the rune
+represented by the sequence.
+It returns a negative value
+at end of file.
+.I Bungetrune
+may be called immediately after
+.I Bgetrune
+to allow the same
+.SM UTF
+sequence to be reread as either bytes or a rune.
+.I Bungetc
+and
+.I Bungetrune
+may back up a maximum of five bytes.
+.PP
+.I Bgetd
+uses
+.I fmtcharstod
+(see
+.IR fmtstrtod (3))
+and
+.I Bgetc
+to read the formatted
+floating-point number in the input stream,
+skipping initial blanks and tabs.
+The value is stored in
+.BR *d.
+.PP
+.I Bread
+reads
+.I nbytes
+of data from
+.I bp
+into memory starting at
+.IR addr .
+The number of bytes read is returned on success
+and a negative value is returned if a read error occurred.
+.PP
+.I Bseek
+applies
+.IR lseek (2)
+to
+.IR bp .
+It returns the new file offset.
+.I Boffset
+returns the file offset of the next character to be processed.
+.PP
+.I Bputc
+outputs the low order 8 bits of
+.I c
+on
+.IR bp .
+If this causes a
+.IR write
+to occur and there is an error,
+a negative value is returned.
+Otherwise, a zero is returned.
+.PP
+.I Bputrune
+calls
+.I Bputc
+to output the low order
+16 bits of
+.I c
+as a rune
+in
+.SM UTF
+format
+on the output stream.
+.PP
+.I Bprint
+is a buffered interface to
+.IR print (3).
+If this causes a
+.IR write
+to occur and there is an error,
+a negative value
+.RB ( Beof )
+is returned.
+Otherwise, the number of bytes output is returned.
+.I Bvprint
+does the same except it takes as argument a
+.B va_list
+parameter, so it can be called within a variadic function.
+.PP
+.I Bwrite
+outputs
+.I nbytes
+of data starting at
+.I addr
+to
+.IR bp .
+If this causes a
+.IR write
+to occur and there is an error,
+a negative value is returned.
+Otherwise, the number of bytes written is returned.
+.PP
+.I Bflush
+causes any buffered output associated with
+.I bp
+to be written.
+The return is as for
+.IR Bputc .
+.I Bflush
+is called on
+exit for every buffer still open
+for writing.
+.PP
+.I Bbuffered
+returns the number of bytes in the buffer.
+When reading, this is the number of bytes still available from the last
+read on the file; when writing, it is the number of bytes ready to be
+written.
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR open (2),
+.IR print (3),
+.IR atexit (3),
+.IR utf (7),
+.SH DIAGNOSTICS
+.I Bio
+routines that return integers yield
+.B Beof
+if 
+.I bp
+is not the descriptor of an open file.
+.I Bopen
+returns zero if the file cannot be opened in the given mode.
+All routines set
+.I errstr
+on error.
+.SH BUGS
+.I Brdline
+returns an error on strings longer than the buffer associated
+with the file
+and also if the end-of-file is encountered
+before a delimiter.
+.I Blinelen
+will tell how many characters are available
+in these cases.
+In the case of a true end-of-file,
+.I Blinelen
+will return zero.
+At the cost of allocating a buffer,
+.I Brdstr
+sidesteps these issues.
+.PP
+The data returned by
+.I Brdline
+may be overwritten by calls to any other
+.I bio
+routine on the same
+.IR bp.
--- a/unix/man/ex.man
+++ b/unix/man/ex.man
@ -0,0 +1,8 @@
+.deEX
+.ift .ft5
+.nf
+..
+.deEE
+.ft1
+.fi
+..
--- a/unix/man/fixurls
+++ b/unix/man/fixurls
@ -0,0 +1,34 @@
+#!/usr/bin/perl
+
+open(OMIT, "9 sed -n 's/.*Omitman\\[\"(.*)\\((.)\\)\".*/\\1 \\2/p' /usr/local/plan9/dist/checkman.awk |") || die "omit: $!";
+@omit = <OMIT>;
+close OMIT;
+chomp @omit;
+push @omit, "grep 1", "lseek 2", "tcs 1", "sed 1", "rc 1", "strcat 3", "yacc 1";
+
+sub noref {
+	my ($p, $s) = @_;
+
+	$text =~ s!<a href="../man$s/$p.html">(([^<]|<[^/]|</[^a])*)</a>!\1!g;
+}
+
+for($i=0; $i<@ARGV; $i++){
+	open(IN, $ARGV[$i]) || die "open $ARGV[$i]: $!";
+	@text = <IN>;
+	close IN;
+	$text = join("", @text);
+
+	foreach $o (@omit) {
+		$o =~ /(.*) (.*)/;
+		noref($1, $2);
+	}
+
+	$text =~ s!../man(.)/([^.]*)\.html!$2$1.html!g;
+	$text =~ s!(http://swtch.com/plan9port/unix)!<a href="\1">\1</a>!g;
+
+	open(OUT, ">$ARGV[$i]") || die "open $ARGV[$i]: $!";
+	print OUT $text;
+	close OUT;
+}
+
+exit 0;
--- a/unix/man/fmtinstall.3
+++ b/unix/man/fmtinstall.3
@ -0,0 +1,371 @@
+.TH FMTINSTALL 3
+.SH NAME
+fmtinstall, dofmt, dorfmt, fmtprint, fmtvprint, fmtrune, fmtstrcpy, fmtrunestrcpy, fmtfdinit, fmtfdflush, fmtstrinit, fmtstrflush, runefmtstrinit, runefmtstrflush, errfmt \- support for user-defined print formats and output routines
+.SH SYNOPSIS
+.B #include <utf.h>
+.br
+.B #include <fmt.h>
+.PP
+.ft L
+.nf
+.ta \w'    'u +\w'    'u +\w'    'u +\w'    'u +\w'    'u
+typedef struct Fmt	Fmt;
+struct Fmt{
+	uchar	runes;		/* output buffer is runes or chars? */
+	void	*start;		/* of buffer */
+	void	*to;		/* current place in the buffer */
+	void	*stop;		/* end of the buffer; overwritten if flush fails */
+	int		(*flush)(Fmt*);	/* called when to == stop */
+	void	*farg;		/* to make flush a closure */
+	int		nfmt;		/* num chars formatted so far */
+	va_list	args;		/* args passed to dofmt */
+	int		r;			/* % format Rune */
+	int		width;
+	int		prec;
+	ulong	flags;
+};
+
+enum{
+	FmtWidth	= 1,
+	FmtLeft		= FmtWidth << 1,
+	FmtPrec		= FmtLeft << 1,
+	FmtSharp	= FmtPrec << 1,
+	FmtSpace	= FmtSharp << 1,
+	FmtSign		= FmtSpace << 1,
+	FmtZero		= FmtSign << 1,
+	FmtUnsigned	= FmtZero << 1,
+	FmtShort	= FmtUnsigned << 1,
+	FmtLong		= FmtShort << 1,
+	FmtVLong	= FmtLong << 1,
+	FmtComma	= FmtVLong << 1,
+
+	FmtFlag		= FmtComma << 1
+};
+.fi
+.PP
+.B
+.ta \w'\fLchar* 'u
+
+.PP
+.B
+int	fmtfdinit(Fmt *f, int fd, char *buf, int nbuf);
+.PP
+.B
+int	fmtfdflush(Fmt *f);
+.PP
+.B
+int	fmtstrinit(Fmt *f);
+.PP
+.B
+char*	fmtstrflush(Fmt *f);
+.PP
+.B
+int	runefmtstrinit(Fmt *f);
+.PP
+.B
+Rune*	runefmtstrflush(Fmt *f);
+
+.PP
+.B
+int	fmtinstall(int c, int (*fn)(Fmt*));
+.PP
+.B
+int	dofmt(Fmt *f, char *fmt);
+.PP
+.B
+int	dorfmt(Fmt*, Rune *fmt);
+.PP
+.B
+int	fmtprint(Fmt *f, char *fmt, ...);
+.PP
+.B
+int	fmtvprint(Fmt *f, char *fmt, va_list v);
+.PP
+.B
+int	fmtrune(Fmt *f, int r);
+.PP
+.B
+int	fmtstrcpy(Fmt *f, char *s);
+.PP
+.B
+int	fmtrunestrcpy(Fmt *f, Rune *s);
+.PP
+.B
+int	errfmt(Fmt *f);
+.SH DESCRIPTION
+The interface described here allows the construction of custom
+.IR print (3)
+verbs and output routines.
+In essence, they provide access to the workings of the formatted print code.
+.PP
+The
+.IR print (3)
+suite maintains its state with a data structure called
+.BR Fmt .
+A typical call to
+.IR print (3)
+or its relatives initializes a
+.B Fmt
+structure, passes it to subsidiary routines to process the output,
+and finishes by emitting any saved state recorded in the
+.BR Fmt .
+The details of the
+.B Fmt
+are unimportant to outside users, except insofar as the general
+design influences the interface.
+The
+.B Fmt
+records whether the output is in runes or bytes,
+the verb being processed, its precision and width,
+and buffering parameters.
+Most important, it also records a
+.I flush
+routine that the library will call if a buffer overflows.
+When printing to a file descriptor, the flush routine will
+emit saved characters and reset the buffer; when printing
+to an allocated string, it will resize the string to receive more output.
+The flush routine is nil when printing to fixed-size buffers.
+User code need never provide a flush routine; this is done internally
+by the library.
+.SS Custom output routines
+To write a custom output routine, such as an error handler that
+formats and prints custom error messages, the output sequence can be run
+from outside the library using the routines described here.
+There are two main cases: output to an open file descriptor
+and output to a string.
+.PP
+To write to a file descriptor, call
+.I fmtfdinit
+to initialize the local
+.B Fmt
+structure
+.IR f ,
+giving the file descriptor
+.IR fd ,
+the buffer
+.IR buf ,
+and its size
+.IR nbuf .
+Then call
+.IR fmtprint
+or
+.IR fmtvprint
+to generate the output.
+These behave like
+.B fprint
+(see
+.IR print (3))
+or
+.B vfprint
+except that the characters are buffered until
+.I fmtfdflush
+is called and the return value is either 0 or \-1.
+A typical example of this sequence appears in the Examples section.
+.PP
+The same basic sequence applies when outputting to an allocated string:
+call
+.I fmtstrinit
+to initialize the
+.BR Fmt ,
+then call
+.I fmtprint
+and
+.I fmtvprint
+to generate the output.
+Finally,
+.I fmtstrflush
+will return the allocated string, which should be freed after use.
+To output to a rune string, use
+.I runefmtstrinit
+and
+.IR runefmtstrflush .
+Regardless of the output style or type,
+.I fmtprint
+or
+.I fmtvprint
+generates the characters.
+.SS Custom format verbs
+.I Fmtinstall
+is used to install custom verbs and flags labeled by character
+.IR c ,
+which may be any non-zero Unicode character.
+.I Fn
+should be declared as
+.IP
+.EX
+int	fn(Fmt*)
+.EE
+.PP
+.IB Fp ->r
+is the flag or verb character to cause
+.I fn
+to be called.
+In
+.IR fn ,
+.IB fp ->width ,
+.IB fp ->prec
+are the width and precision, and
+.IB fp ->flags
+the decoded flags for the verb (see
+.IR print (3)
+for a description of these items).
+The standard flag values are:
+.B FmtSign
+.RB ( + ),
+.B FmtLeft
+.RB ( - ),
+.B FmtSpace
+.RB ( '\ ' ),
+.B FmtSharp
+.RB ( # ),
+.B FmtComma
+.RB ( , ),
+.B FmtLong
+.RB ( l ),
+.B FmtShort
+.RB ( h ),
+.B FmtUnsigned
+.RB ( u ),
+and
+.B FmtVLong
+.RB ( ll ).
+The flag bits
+.B FmtWidth
+and
+.B FmtPrec
+identify whether a width and precision were specified.
+.PP
+.I Fn
+is passed a pointer to the
+.B Fmt
+structure recording the state of the output.
+If
+.IB fp ->r
+is a verb (rather than a flag),
+.I fn
+should use 
+.B Fmt->args
+to fetch its argument from the list,
+then format it, and return zero.
+If
+.IB fp ->r
+is a flag,
+.I fn
+should return one.
+All interpretation of
+.IB fp ->width\f1,
+.IB fp ->prec\f1,
+and
+.IB fp-> flags
+is left up to the conversion routine.
+.I Fmtinstall
+returns 0 if the installation succeeds, \-1 if it fails.
+.PP
+.IR Fmtprint
+and
+.IR fmtvprint
+may be called to
+help prepare output in custom conversion routines.
+However, these functions clear the width, precision, and flags.
+Both functions return 0 for success and \-1 for failure.
+.PP
+The functions
+.I dofmt
+and
+.I dorfmt
+are the underlying formatters; they
+use the existing contents of
+.B Fmt
+and should be called only by sophisticated conversion routines.
+These routines return the number of characters (bytes of UTF or runes)
+produced.
+.PP
+Some internal functions may be useful to format primitive types.
+They honor the width, precision and flags as described in
+.IR print (3).
+.I Fmtrune
+formats a single character
+.BR r .
+.I Fmtstrcpy
+formats a string
+.BR s ;
+.I fmtrunestrcpy
+formats a rune string
+.BR s .
+.I Errfmt
+formats the system error string.
+All these routines return zero for successful execution.
+Conversion routines that call these functions will work properly
+regardless of whether the output is bytes or runes.
+.\" .PP
+.\" .IR 2c (1)
+.\" describes the C directive
+.\" .B #pragma
+.\" .B varargck
+.\" that can be used to provide type-checking for custom print verbs and output routines.
+.SH EXAMPLES
+This function prints an error message with a variable
+number of arguments and then quits.
+Compared to the corresponding example in
+.IR print (3),
+this version uses a smaller buffer, will never truncate
+the output message, but might generate multiple
+.B write
+system calls to produce its output.
+.IP
+.EX
+.ta 6n +6n +6n +6n +6n +6n +6n +6n +6n
+#pragma	varargck	argpos	error	1
+
+void fatal(char *fmt, ...)
+{
+	Fmt f;
+	char buf[64];
+	va_list arg;
+
+	fmtfdinit(&f, 1, buf, sizeof buf);
+	fmtprint(&f, "fatal: ");
+	va_start(arg, fmt);
+	fmtvprint(&f, fmt, arg);
+	va_end(arg);
+	fmtprint(&f, "\en");
+	fmtfdflush(&f);
+	exits("fatal error");
+}
+.EE
+.PP
+This example adds a verb to print complex numbers.
+.IP
+.EX
+typedef
+struct {
+	double	r, i;
+} Complex;
+
+#pragma	varargck	type	"X"	Complex
+
+int
+Xfmt(Fmt *f)
+{
+	Complex c;
+
+	c = va_arg(f->args, Complex);
+	return fmtprint(f, "(%g,%g)", c.r, c.i);
+}
+
+main(...)
+{
+	Complex x = (Complex){ 1.5, -2.3 };
+
+	fmtinstall('X', Xfmt);
+	print("x = %X\en", x);
+}
+.EE
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR print (3),
+.IR utf (7)
+.SH DIAGNOSTICS
+These routines return negative numbers or nil for errors and set
+.IR errstr .
--- a/unix/man/fmtstrtod.3
+++ b/unix/man/fmtstrtod.3
@ -0,0 +1,54 @@
+.TH FMTSTRTOD 3
+.SH NAME
+fmtstrtod, fmtcharstod \ - convert text to numbers
+.SH SYNOPSIS
+.B #include <utf.h>
+.br
+.B #include <fmt.h>
+.PP
+.PP
+.B
+double	fmtstrtod(char *nptr, char **rptr)
+.PP
+.B
+double	fmtcharstod(int (*f)(void *), void *a)
+.SH DESCRIPTION
+.I Fmtstrtod
+converts a string pointed to by
+.I nptr
+to floating point representation and, if
+.I rptr
+is not zero, sets
+.I *rptr
+to point to the input character immediately after the string converted.
+.I Fmtstrtod
+recognizes an optional string of tabs and spaces,
+then an optional sign, then a string of digits optionally
+containing a decimal point, then an optional 
+.L e
+or
+.L E
+followed by an optionally signed integer.
+.PP
+.PP
+.I Fmtcharstod
+interprets floating point numbers in the manner of
+.IR atof ,
+but gets successive characters by calling
+.BR (*\fIf\fP)(a) .
+The last call to
+.I f
+terminates the scan, so it must have returned a character that
+is not a legal continuation of a number.
+Therefore, it may be necessary to back up the input stream one character
+after calling
+.IR fmtcharstod .
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR fscanf (3)
+.SH DIAGNOSTICS
+Zero is returned if the beginning of the input string is not interpretable
+as a number; even in this case,
+.I rptr
+will be updated.
--- a/unix/man/index.html
+++ b/unix/man/index.html
@ -0,0 +1,9 @@
+<html>
+<head>
+	<meta http-equiv="refresh" content="0; URL=..">
+	<title>you're lost!</title>
+</head>
+<body>
+Please go <a href="..">here</a>.
+</body>
+</html>
--- a/unix/man/isalpharune.3
+++ b/unix/man/isalpharune.3
@ -0,0 +1,49 @@
+.TH ISALPHARUNE 3
+.SH NAME
+isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases
+.SH SYNOPSIS
+.B #include <utf.h>
+.PP
+.B
+int isalpharune(Rune c)
+.PP
+.B
+int islowerrune(Rune c)
+.PP
+.B
+int isspacerune(Rune c)
+.PP
+.B
+int istitlerune(Rune c)
+.PP
+.B
+int isupperrune(Rune c)
+.PP
+.B
+Rune tolowerrune(Rune c)
+.PP
+.B
+Rune totitlerune(Rune c)
+.PP
+.B
+Rune toupperrune(Rune c)
+.SH DESCRIPTION
+These routines examine and operate on Unicode characters,
+in particular a subset of their properties as defined in the Unicode standard.
+Unicode defines some characters as alphabetic and specifies three cases:
+upper, lower, and title.
+Analogously to
+.IR isalpha (3)
+for
+.SM ASCII\c
+,
+these routines
+test types and modify cases for Unicode characters.
+The names are self-explanatory.
+.PP
+The case-conversion routines return the character unchanged if it has no case.
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH "SEE ALSO
+.IR isalpha (3) ,
+.IR "The Unicode Standard" .
--- a/unix/man/mk.1
+++ b/unix/man/mk.1
@ -0,0 +1,684 @@
+.TH MK 1
+.SH NAME
+mk \- maintain (make) related files
+.SH SYNOPSIS
+.B mk
+[
+.B -f
+.I mkfile
+] ...
+[
+.I option ...
+]
+[
+.I target ...
+]
+.SH DESCRIPTION
+.I Mk
+uses the dependency rules specified in
+.I mkfile
+to control the update (usually by compilation) of
+.I targets
+(usually files)
+from the source files upon which they depend.
+The
+.I mkfile
+(default
+.LR mkfile )
+contains a
+.I rule
+for each target that identifies the files and other
+targets upon which it depends and an
+.IR sh (1)
+script, a
+.IR recipe ,
+to update the target.
+The script is run if the target does not exist
+or if it is older than any of the files it depends on.
+.I Mkfile
+may also contain
+.I meta-rules
+that define actions for updating implicit targets.
+If no
+.I target
+is specified, the target of the first rule (not meta-rule) in
+.I mkfile
+is updated.
+.PP
+The environment variable
+.B $NPROC
+determines how many targets may be updated simultaneously;
+Some operating systems, e.g., Plan 9, set
+.B $NPROC
+automatically to the number of CPUs on the current machine.
+.PP
+Options are:
+.TP \w'\fL-d[egp]\ 'u
+.B -a
+Assume all targets to be out of date.
+Thus, everything is updated.
+.PD 0
+.TP
+.BR -d [ egp ]
+Produce debugging output
+.RB ( p
+is for parsing,
+.B g
+for graph building,
+.B e
+for execution).
+.TP
+.B -e
+Explain why each target is made.
+.TP
+.B -i
+Force any missing intermediate targets to be made.
+.TP
+.B -k
+Do as much work as possible in the face of errors.
+.TP
+.B -n
+Print, but do not execute, the commands
+needed to update the targets.
+.TP
+.B -s
+Make the command line arguments sequentially rather than in parallel.
+.TP
+.B -t
+Touch (update the modified date of) file targets, without
+executing any recipes.
+.TP
+.BI -w target1 , target2,...
+Pretend the modify time for each
+.I target
+is the current time; useful in conjunction with
+.B -n
+to learn what updates would be triggered by
+modifying the
+.IR targets .
+.PD
+.SS The \fLmkfile\fP
+A
+.I mkfile
+consists of
+.I assignments
+(described under `Environment') and
+.IR rules .
+A rule contains
+.I targets
+and a
+.IR tail .
+A target is a literal string
+and is normally a file name.
+The tail contains zero or more 
+.I prerequisites
+and an optional
+.IR recipe ,
+which is an
+.B shell
+script.
+Each line of the recipe must begin with white space.
+A rule takes the form
+.IP
+.EX
+target: prereq1 prereq2
+        \f2recipe using\fP prereq1, prereq2 \f2to build\fP target
+.EE
+.PP
+When the recipe is executed,
+the first character on every line is elided.
+.PP
+After the colon on the target line, a rule may specify
+.IR attributes ,
+described below.
+.PP
+A
+.I meta-rule 
+has a target of the form
+.IB A % B
+where
+.I A
+and
+.I B
+are (possibly empty) strings.
+A meta-rule acts as a rule for any potential target whose
+name matches
+.IB A % B
+with
+.B %
+replaced by an arbitrary string, called the
+.IR stem .
+In interpreting a meta-rule,
+the stem is substituted for all occurrences of
+.B %
+in the prerequisite names.
+In the recipe of a meta-rule, the environment variable
+.B $stem
+contains the string matched by the
+.BR % .
+For example, a meta-rule to compile a C program
+might be:
+.IP
+.EX
+%:    %.c
+        cc -c $stem.c
+        ld -o $stem $stem.o
+.EE
+.PP
+Meta-rules may contain an ampersand
+.B &
+rather than a percent sign
+.BR % .
+A
+.B %
+matches a maximal length string of any characters;
+an
+.B &
+matches a maximal length string of any characters except period
+or slash.
+.PP
+The text of the
+.I mkfile
+is processed as follows.
+Lines beginning with
+.B <
+followed by a file name are replaced by the contents of the named
+file.
+Lines beginning with
+.B "<|"
+followed by a file name are replaced by the output
+of the execution of the named
+file.
+Blank lines and comments, which run from unquoted
+.B #
+characters to the following newline, are deleted.
+The character sequence backslash-newline is deleted,
+so long lines in
+.I mkfile
+may be folded.
+Non-recipe lines are processed by substituting for
+.BI `{ command }
+the output of the
+.I command
+when run by
+.IR sh .
+References to variables are replaced by the variables' values.
+Special characters may be quoted using single quotes
+.BR \&''
+as in
+.IR sh (1).
+.PP
+Assignments and rules are distinguished by
+the first unquoted occurrence of
+.B :
+(rule)
+or
+.B =
+(assignment).
+.PP
+A later rule may modify or override an existing rule under the
+following conditions:
+.TP
+\-
+If the targets of the rules exactly match and one rule
+contains only a prerequisite clause and no recipe, the
+clause is added to the prerequisites of the other rule.
+If either or both targets are virtual, the recipe is
+always executed.
+.TP
+\-
+If the targets of the rules match exactly and the
+prerequisites do not match and both rules
+contain recipes,
+.I mk
+reports an ``ambiguous recipe'' error.
+.TP
+\-
+If the target and prerequisites of both rules match exactly,
+the second rule overrides the first.
+.SS Environment
+Rules may make use of
+shell
+environment variables.
+A legal reference of the form
+.B $OBJ
+or
+.B ${name}
+is expanded as in
+.IR sh (1).
+A reference of the form
+.BI ${name: A % B = C\fL%\fID\fL}\fR,
+where
+.I A, B, C, D
+are (possibly empty) strings,
+has the value formed by expanding
+.B $name
+and substituting
+.I C
+for
+.I A
+and
+.I D
+for
+.I B
+in each word in
+.B $name
+that matches pattern
+.IB A % B\f1.
+.PP
+Variables can be set by
+assignments of the form
+.I
+        var\fL=\fR[\fIattr\fL=\fR]\fIvalue\fR
+.br
+Blanks in the
+.I value
+break it into words.
+Such variables are exported
+to the environment of
+recipes as they are executed, unless
+.BR U ,
+the only legal attribute
+.IR attr ,
+is present.
+The initial value of a variable is
+taken from (in increasing order of precedence)
+the default values below,
+.I mk's
+environment, the
+.IR mkfiles ,
+and any command line assignment as an argument to
+.IR mk .
+A variable assignment argument overrides the first (but not any subsequent)
+assignment to that variable.
+.PP
+The variable
+.B MKFLAGS
+contains all the option arguments (arguments starting with
+.L -
+or containing
+.LR = )
+and
+.B MKARGS
+contains all the targets in the call to
+.IR mk .
+.PP
+The variable
+.B MKSHELL
+contains the shell command line
+.I mk
+uses to run recipes.
+If the first word of the command ends in
+.B rc
+or
+.BR rcsh ,
+.I mk
+uses
+.IR rc (1)'s
+quoting rules; otherwise it uses
+.IR sh (1)'s.
+The
+.B MKSHELL
+variable is consulted when the mkfile is read, not when it is executed,
+so that different shells can be used within a single mkfile:
+.IP
+.EX
+MKSHELL=$PLAN9/bin/rc
+use-rc:V:
+	for(i in a b c) echo $i
+
+MKSHELL=sh
+use-sh:V:
+	for i in a b c; do echo $i; done
+.EE
+.LP
+Mkfiles included via
+.B <
+or
+.B <|
+.RI ( q.v. )
+see their own private copy of
+.BR MKSHELL ,
+which always starts set to
+.B sh .
+.PP
+Dynamic information may be included in the mkfile by using a line of the form
+.IP
+\fR<|\fIcommand\fR \fIargs\fR
+.LP
+This runs the command 
+.I command
+with the given arguments
+.I args
+and pipes its standard output to
+.I mk
+to be included as part of the mkfile. For instance, the Inferno kernels
+use this technique
+to run a shell command with an awk script and a configuration
+file as arguments in order for
+the
+.I awk
+script to process the file and output a set of variables and their values.
+.SS Execution
+.PP
+During execution,
+.I mk
+determines which targets must be updated, and in what order,
+to build the
+.I names
+specified on the command line.
+It then runs the associated recipes.
+.PP
+A target is considered up to date if it has no prerequisites or
+if all its prerequisites are up to date and it is newer
+than all its prerequisites.
+Once the recipe for a target has executed, the target is
+considered up to date.
+.PP
+The date stamp
+used to determine if a target is up to date is computed
+differently for different types of targets.
+If a target is
+.I virtual
+(the target of a rule with the
+.B V
+attribute),
+its date stamp is initially zero; when the target is
+updated the date stamp is set to
+the most recent date stamp of its prerequisites.
+Otherwise, if a target does not exist as a file,
+its date stamp is set to the most recent date stamp of its prerequisites,
+or zero if it has no prerequisites.
+Otherwise, the target is the name of a file and
+the target's date stamp is always that file's modification date.
+The date stamp is computed when the target is needed in
+the execution of a rule; it is not a static value.
+.PP
+Nonexistent targets that have prerequisites
+and are themselves prerequisites are treated specially.
+Such a target
+.I t
+is given the date stamp of its most recent prerequisite
+and if this causes all the targets which have
+.I t
+as a prerequisite to be up to date,
+.I t
+is considered up to date.
+Otherwise,
+.I t
+is made in the normal fashion.
+The
+.B -i
+flag overrides this special treatment.
+.PP
+Files may be made in any order that respects
+the preceding restrictions.
+.PP
+A recipe is executed by supplying the recipe as standard input to
+the command
+.BR /bin/sh .
+(Note that unlike
+.IR make ,
+.I mk
+feeds the entire recipe to the shell rather than running each line
+of the recipe separately.)
+The environment is augmented by the following variables:
+.TP 14
+.B $alltarget
+all the targets of this rule.
+.TP
+.B $newprereq
+the prerequisites that caused this rule to execute.
+.TP
+.B $newmember
+the prerequisites that are members of an aggregate
+that caused this rule to execute.
+When the prerequisites of a rule are members of an
+aggregate,
+.B $newprereq
+contains the name of the aggregate and out of date
+members, while
+.B $newmember
+contains only the name of the members.
+.TP
+.B $nproc
+the process slot for this recipe.
+It satisfies
+.RB 0≤ $nproc < $NPROC .
+.TP
+.B $pid
+the process id for the
+.I mk
+executing the recipe.
+.TP
+.B $prereq
+all the prerequisites for this rule.
+.TP
+.B $stem
+if this is a meta-rule,
+.B $stem
+is the string that matched
+.B %
+or
+.BR & .
+Otherwise, it is empty.
+For regular expression meta-rules (see below), the variables
+.LR stem0 ", ...,"
+.L stem9
+are set to the corresponding subexpressions.
+.TP
+.B $target
+the targets for this rule that need to be remade.
+.PP
+These variables are available only during the execution of a recipe,
+not while evaluating the
+.IR mkfile .
+.PP
+Unless the rule has the
+.B Q
+attribute,
+the recipe is printed prior to execution
+with recognizable environment variables expanded.
+Commands returning error status
+cause
+.I mk
+to terminate.
+.PP
+Recipes and backquoted
+.B rc
+commands in places such as assignments
+execute in a copy of
+.I mk's
+environment; changes they make to
+environment variables are not visible from
+.IR mk .
+.PP
+Variable substitution in a rule is done when
+the rule is read; variable substitution in the recipe is done
+when the recipe is executed.  For example:
+.IP
+.EX
+bar=a.c
+foo:	$bar
+        $CC -o foo $bar
+bar=b.c
+.EE
+.PP
+will compile
+.B b.c
+into
+.BR foo ,
+if
+.B a.c
+is newer than
+.BR foo .
+.SS Aggregates
+Names of the form
+.IR a ( b )
+refer to member
+.I b
+of the aggregate
+.IR a .
+.SS Attributes
+The colon separating the target from the prerequisites
+may be
+immediately followed by
+.I attributes
+and another colon.
+The attributes are:
+.TP
+.B D
+If the recipe exits with a non-null status, the target is deleted.
+.TP
+.B E
+Continue execution if the recipe draws errors.
+.TP
+.B N
+If there is no recipe, the target has its time updated.
+.TP
+.B n
+The rule is a meta-rule that cannot be a target of a virtual rule.
+Only files match the pattern in the target.
+.TP
+.B P
+The characters after the
+.B P
+until the terminating
+.B :
+are taken as a program name.
+It will be invoked as
+.B "sh -c prog 'arg1' 'arg2'"
+and should return a zero exit status
+if and only if arg1 is up to date with respect to arg2.
+Date stamps are still propagated in the normal way.
+.TP
+.B Q
+The recipe is not printed prior to execution.
+.TP
+.B R
+The rule is a meta-rule using regular expressions.
+In the rule,
+.B %
+has no special meaning.
+The target is interpreted as a regular expression as defined in
+.IR regexp9 (7).
+The prerequisites may contain references
+to subexpressions in form
+.BI \e n\f1,
+as in the substitute command of
+.IR sed (1).
+.TP
+.B U
+The targets are considered to have been updated
+even if the recipe did not do so.
+.TP
+.B V
+The targets of this rule are marked as virtual.
+They are distinct from files of the same name.
+.PD
+.SH EXAMPLES
+A simple mkfile to compile a program:
+.IP
+.EX
+.ta 8n +8n +8n +8n +8n +8n +8n
+</$objtype/mkfile
+
+prog:	a.$O b.$O c.$O
+	$LD $LDFLAGS -o $target $prereq
+
+%.$O:	%.c
+	$CC $CFLAGS $stem.c
+.EE
+.PP
+Override flag settings in the mkfile:
+.IP
+.EX
+% mk target 'CFLAGS=-S -w'
+.EE
+.PP
+Maintain a library:
+.IP
+.EX
+libc.a(%.$O):N:	%.$O
+libc.a:	libc.a(abs.$O) libc.a(access.$O) libc.a(alarm.$O) ...
+	ar r libc.a $newmember
+.EE
+.PP
+String expression variables to derive names from a master list:
+.IP
+.EX
+NAMES=alloc arc bquote builtins expand main match mk var word
+OBJ=${NAMES:%=%.$O}
+.EE
+.PP
+Regular expression meta-rules:
+.IP
+.EX
+([^/]*)/(.*)\e.$O:R:  \e1/\e2.c
+	cd $stem1; $CC $CFLAGS $stem2.c
+.EE
+.PP
+A correct way to deal with
+.IR yacc (1)
+grammars.
+The file
+.B lex.c
+includes the file
+.B x.tab.h
+rather than
+.B y.tab.h
+in order to reflect changes in content, not just modification time.
+.IP
+.EX
+lex.$O:	x.tab.h
+x.tab.h:	y.tab.h
+	cmp -s x.tab.h y.tab.h || cp y.tab.h x.tab.h
+y.tab.c y.tab.h:	gram.y
+	$YACC -d gram.y
+.EE
+.PP
+The above example could also use the
+.B P
+attribute for the
+.B x.tab.h
+rule:
+.IP
+.EX
+x.tab.h:Pcmp -s:	y.tab.h
+	cp y.tab.h x.tab.h
+.EE
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR sh (1),
+.IR regexp9 (7)
+.PP
+A. Hume,
+``Mk: a Successor to Make''
+(Tenth Edition Research Unix Manuals).
+.PP
+Andrew G. Hume and Bob Flandrena,
+``Maintaining Files on Plan 9 with Mk''.
+.SH HISTORY
+Andrew Hume wrote
+.I mk
+for Tenth Edition Research Unix.
+It was later ported to Plan 9.
+This software is a port of the Plan 9 version back to Unix.
+.SH BUGS
+Identical recipes for regular expression meta-rules only have one target.
+.PP
+Seemingly appropriate input like
+.B CFLAGS=-DHZ=60
+is parsed as an erroneous attribute; correct it by inserting
+a space after the first 
+.LR = .
+.PP
+The recipes printed by
+.I mk
+before being passed to
+the shell
+for execution are sometimes erroneously expanded
+for printing.  Don't trust what's printed; rely
+on what the shell
+does.
--- a/unix/man/mkfile
+++ b/unix/man/mkfile
@ -0,0 +1,48 @@
+MAN=\
+	isalpharune.3\
+	rune.3\
+	runestrcat.3\
+	utf.7\
+	print.3\
+	fmtinstall.3\
+	quote.3\
+	fmtstrtod.3\
+	bio.3\
+	regexp9.3\
+	regexp9.7\
+	mk.1\
+
+HTML=\
+	isalpharune3.html\
+	rune3.html\
+	runestrcat3.html\
+	utf7.html\
+	print3.html\
+	fmtinstall3.html\
+	quote3.html\
+	fmtstrtod3.html\
+	bio3.html\
+	regexp93.html\
+	regexp97.html\
+	mk1.html\
+
+all:V: $MAN $HTML
+
+title='Ported from Plan 9'
+MKSHELL=$PLAN9/bin/rc
+
+%1.html:D: %.1
+	whatis title
+	9 troff -manhtml $prereq | troff2html -t $title > $target
+	./fixurls $target
+
+%3.html:D: %.3
+	9 troff -manhtml $prereq | troff2html -t $title > $target
+	./fixurls $target
+
+%7.html:D: %.7
+	9 troff -manhtml $prereq | troff2html -t $title > $target
+	./fixurls $target
+
+push:V:
+	rsync -e ssh *.html swtch:www/swtch.com/plan9port/unix/man
--- a/unix/man/print.3
+++ b/unix/man/print.3
@ -0,0 +1,474 @@
+.\" diffs from /usr/local/plan9/man/man3/print.3:
+.\"
+.\" - include different headers
+.\" - drop reference to bio(3)
+.\" - change exits to exit
+.\" - text about unsigned verbs
+.\" - source pointer
+.\"
+.TH PRINT 3
+.SH NAME
+print, fprint, sprint, snprint, seprint, smprint, runesprint, runesnprint, runeseprint, runesmprint, vfprint, vsnprint, vseprint, vsmprint, runevsnprint, runevseprint, runevsmprint \- print formatted output
+.SH SYNOPSIS
+.B #include <utf.h>
+.PP
+.B #include <fmt.h>
+.PP
+.ta \w'\fLchar* 'u
+.B
+int	print(char *format, ...)
+.PP
+.B
+int	fprint(int fd, char *format, ...)
+.PP
+.B
+int	sprint(char *s, char *format, ...)
+.PP
+.B
+int	snprint(char *s, int len, char *format, ...)
+.PP
+.B
+char*	seprint(char *s, char *e, char *format, ...)
+.PP
+.B
+char*	smprint(char *format, ...)
+.PP
+.B
+int	runesprint(Rune *s, char *format, ...)
+.PP
+.B
+int	runesnprint(Rune *s, int len, char *format, ...)
+.PP
+.B
+Rune*	runeseprint(Rune *s, Rune *e, char *format, ...)
+.PP
+.B
+Rune*	runesmprint(char *format, ...)
+.PP
+.B
+int	vfprint(int fd, char *format, va_list v)
+.PP
+.B
+int	vsnprint(char *s, int len, char *format, va_list v)
+.PP
+.B
+char*	vseprint(char *s, char *e, char *format, va_list v)
+.PP
+.B
+char*	vsmprint(char *format, va_list v)
+.PP
+.B
+int	runevsnprint(Rune *s, int len, char *format, va_list v)
+.PP
+.B
+Rune*	runevseprint(Rune *s, Rune *e, char *format, va_list v)
+.PP
+.B
+Rune*	runevsmprint(Rune *format, va_list v)
+.PP
+.B
+.SH DESCRIPTION
+.I Print
+writes text to the standard output.
+.I Fprint
+writes to the named output
+file descriptor:
+a buffered form
+is described in
+.IR bio (3).
+.I Sprint
+places text
+followed by the NUL character
+.RB ( \e0 )
+in consecutive bytes starting at
+.IR s ;
+it is the user's responsibility to ensure that
+enough storage is available.
+Each function returns the number of bytes
+transmitted (not including the NUL
+in the case of
+.IR sprint ),
+or
+a negative value if an output error was encountered.
+.PP
+.I Snprint
+is like
+.IR sprint ,
+but will not place more than
+.I len
+bytes in
+.IR s .
+Its result is always NUL-terminated and holds the maximal
+number of complete UTF-8 characters that can fit.
+.I Seprint
+is like
+.IR snprint ,
+except that the end is indicated by a pointer
+.I e
+rather than a count and the return value points to the terminating NUL of the
+resulting string.
+.I Smprint
+is like
+.IR sprint ,
+except that it prints into and returns a string of the required length, which is
+allocated by
+.IR malloc (3).
+.PP
+The routines
+.IR runesprint ,
+.IR runesnprint ,
+.IR runeseprint ,
+and
+.I runesmprint
+are the same as
+.IR sprint ,
+.IR snprint ,
+.IR seprint
+and
+.I smprint
+except that their output is rune strings instead of byte strings.
+.PP
+Finally, the routines
+.IR vfprint ,
+.IR vsnprint ,
+.IR vseprint ,
+.IR vsmprint ,
+.IR runevsnprint ,
+.IR runevseprint ,
+and
+.I runevsmprint
+are like their
+.BR v-less
+relatives except they take as arguments a
+.B va_list
+parameter, so they can be called within a variadic function.
+The Example section shows a representative usage.
+.PP
+Each of these functions
+converts, formats, and prints its
+trailing arguments
+under control of a
+.IR format 
+string.
+The
+format
+contains two types of objects:
+plain characters, which are simply copied to the
+output stream,
+and conversion specifications,
+each of which results in fetching of
+zero or more
+arguments.
+The results are undefined if there are arguments of the
+wrong type or too few
+arguments for the format.
+If the format is exhausted while
+arguments remain, the excess
+is ignored.
+.PP
+Each conversion specification has the following format:
+.IP
+.B "% [flags] verb
+.PP
+The verb is a single character and each flag is a single character or a
+(decimal) numeric string.
+Up to two numeric strings may be used;
+the first is called
+.IR width ,
+the second
+.IR precision .
+A period can be used to separate them, and if the period is
+present then
+.I width
+and
+.I precision
+are taken to be zero if missing, otherwise they are `omitted'.
+Either or both of the numbers may be replaced with the character
+.BR * ,
+meaning that the actual number will be obtained from the argument list
+as an integer.
+The flags and numbers are arguments to
+the
+.I verb
+described below.
+.PP
+The numeric verbs
+.BR d ,
+.BR i ,
+.BR u ,
+.BR o ,
+.BR b ,
+.BR x ,
+and
+.B X
+format their arguments in decimal, decimal,
+unsigned decimal, octal, binary, hexadecimal, and upper case hexadecimal.
+Each interprets the flags
+.BR 0 ,
+.BR h ,
+.BR hh ,
+.BR l ,
+.BR + ,
+.BR - ,
+.BR , ,
+and
+.B #
+to mean pad with zeros,
+short, byte, long, always print a sign, left justified, commas every three digits,
+and alternate format.
+Also, a space character in the flag
+position is like
+.BR + ,
+but prints a space instead of a plus sign for non-negative values.
+If neither
+short nor long is specified,
+then the argument is an
+.BR int .
+If an unsigned verb is specified,
+then the argument is interpreted as a
+positive number and no sign is output;
+space and
+.B +
+flags are ignored for unsigned verbs.
+If two
+.B l
+flags are given,
+then the argument is interpreted as a
+.B vlong
+(usually an 8-byte, sometimes a 4-byte integer).
+If
+.I precision
+is not omitted, the number is padded on the left with zeros
+until at least
+.I precision
+digits appear.
+If
+.I precision
+is explicitly 0, and the number is 0,
+no digits are generated, and alternate formatting
+does not apply.
+Then, if alternate format is specified,
+for
+.B o
+conversion, the number is preceded by a
+.B 0
+if it doesn't already begin with one.
+For non-zero numbers and
+.B x
+conversion, the number is preceded by
+.BR 0x ;
+for
+.B X
+conversion, the number is preceded by
+.BR 0X .
+Finally, if
+.I width
+is not omitted, the number is padded on the left (or right, if
+left justification is specified) with enough blanks to
+make the field at least
+.I width
+characters long.
+.PP
+The floating point verbs
+.BR f ,
+.BR e ,
+.BR E ,
+.BR g ,
+and
+.B G
+take a
+.B double
+argument.
+Each interprets the flags
+.BR 0 ,
+.BR L
+.BR + ,
+.BR - ,
+and
+.B #
+to mean pad with zeros,
+long double argument,
+always print a sign,
+left justified,
+and
+alternate format.
+.I Width
+is the minimum field width and,
+if the converted value takes up less than
+.I width
+characters, it is padded on the left (or right, if `left justified')
+with spaces.
+.I Precision
+is the number of digits that are converted after the decimal place for
+.BR e ,
+.BR E ,
+and
+.B f
+conversions,
+and
+.I precision
+is the maximum number of significant digits for
+.B g
+and
+.B G
+conversions.
+The 
+.B f
+verb produces output of the form
+.RB [ - ] digits [ .digits\fR].
+.B E
+conversion appends an exponent
+.BR E [ - ] digits ,
+and
+.B e
+conversion appends an exponent
+.BR e [ - ] digits .
+The
+.B g
+verb will output the argument in either
+.B e
+or
+.B f
+with the goal of producing the smallest output.
+Also, trailing zeros are omitted from the fraction part of
+the output, and a trailing decimal point appears only if it is followed
+by a digit.
+The
+.B G
+verb is similar, but uses
+.B E
+format instead of
+.BR e .
+When alternate format is specified, the result will always contain a decimal point,
+and for
+.B g
+and
+.B G
+conversions, trailing zeros are not removed.
+.PP
+The
+.B s
+verb copies a string
+(pointer to
+.BR char )
+to the output.
+The number of characters copied
+.RI ( n )
+is the minimum
+of the size of the string and
+.IR precision .
+These
+.I n
+characters are justified within a field of
+.I width
+characters as described above.
+If a
+.I precision
+is given, it is safe for the string not to be nul-terminated
+as long as it is at least
+.I precision
+characters (not bytes!) long.
+The
+.B S
+verb is similar, but it interprets its pointer as an array
+of runes (see
+.IR utf (7));
+the runes are converted to
+.SM UTF
+before output.
+.PP
+The
+.B c
+verb copies a single
+.B char
+(promoted to
+.BR int )
+justified within a field of
+.I width
+characters as described above.
+The
+.B C
+verb is similar, but works on runes.
+.PP
+The
+.B p
+verb formats a pointer value.
+At the moment, it is a synonym for
+.BR x ,
+but that will change if pointers and integers are different sizes.
+.PP
+The
+.B r
+verb takes no arguments; it copies the error string returned by a call to
+.IR strerror (3)
+with an argument of
+.IR errno.
+.PP
+Custom verbs may be installed using
+.IR fmtinstall (3).
+.SH EXAMPLE
+This function prints an error message with a variable
+number of arguments and then quits.
+.IP
+.EX
+.ta 6n +6n +6n
+void fatal(char *msg, ...)
+{
+	char buf[1024], *out;
+	va_list arg;
+
+	out = seprint(buf, buf+sizeof buf, "Fatal error: ");
+	va_start(arg, msg);
+	out = vseprint(out, buf+sizeof buf, msg, arg);
+	va_end(arg);
+	write(2, buf, out-buf);
+	exit(1);
+}
+.EE
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR fmtinstall (3),
+.IR fprintf (3),
+.IR utf (7)
+.SH DIAGNOSTICS
+Routines that write to a file descriptor or call
+.IR malloc
+set
+.IR errstr .
+.SH BUGS
+The formatting is close to that specified for ANSI
+.IR fprintf (3);
+the main difference is that
+.B b
+and
+.B r
+are not in ANSI and some
+.B C9X
+verbs and syntax are missing.
+Also, and distinctly not a bug,
+.I print
+and friends generate
+.SM UTF
+rather than
+.SM ASCII.
+.PP
+There is no
+.IR runeprint ,
+.IR runefprint ,
+etc. because runes are byte-order dependent and should not be written directly to a file; use the
+UTF output of
+.I print
+or
+.I fprint
+instead.
+Also,
+.I sprint
+is deprecated for safety reasons; use
+.IR snprint ,
+.IR seprint ,
+or
+.I smprint
+instead.
+Safety also precludes the existence of
+.IR runesprint .
--- a/unix/man/quote.3
+++ b/unix/man/quote.3
@ -0,0 +1,151 @@
+.TH QUOTE 3
+.SH NAME
+quotestrdup, quoterunestrdup, unquotestrdup, unquoterunestrdup, quotestrfmt, quoterunestrfmt, quotefmtinstall, fmtdoquote \- quoted character strings
+.SH SYNOPSIS
+.B #include <utf.h>
+.br
+.B #include <fmt.h>
+.PP
+.B
+char *quotestrdup(char *s)
+.PP
+.B
+Rune *quoterunestrdup(Rune *s)
+.PP
+.B
+char *unquotestrdup(char *s)
+.PP
+.B
+Rune *unquoterunestrdup(Rune *s)
+.PP
+.B
+int quotestrfmt(Fmt*)
+.PP
+.B
+int quoterunestrfmt(Fmt*)
+.PP
+.B
+void quotefmtinstall(void)
+.PP
+.B
+int (*fmtdoquote)(int c)
+.PP
+.SH DESCRIPTION
+These routines manipulate character strings, either adding or removing
+quotes as necessary.
+In the quoted form, the strings are in the style of
+.IR rc (1) ,
+with single quotes surrounding the string.
+Embedded single quotes are indicated by a doubled single quote.
+For instance,
+.IP
+.EX
+Don't worry!
+.EE
+.PP
+when quoted becomes
+.IP
+.EX
+\&'Don''t worry!'
+.EE
+.PP
+The empty string is represented by two quotes,
+.BR '' .
+.PP
+The first four functions act as variants of
+.B strdup
+(see
+.IR strcat (3)).
+Each returns a
+freshly allocated copy of the string, created using
+.IR malloc (3).
+.I Quotestrdup
+returns a quoted copy of
+.IR s ,
+while
+.I unquotestrdup
+returns a copy of
+.IR s
+with the quotes evaluated.
+The
+.I rune
+versions of these functions do the same for
+.CW Rune
+strings (see
+.IR runestrcat (3)).
+.PP
+The string returned by
+.I quotestrdup
+or
+.I quoterunestrdup
+has the following properties:
+.TP
+1.
+If the original string
+.IR s
+is empty, the returned string is
+.BR '' .
+.TP
+2.
+If
+.I s
+contains no quotes, blanks, or control characters,
+the returned string is identical to
+.IR s .
+.TP
+3.
+If
+.I s
+needs quotes to be added, the first character of the returned
+string will be a quote.
+For example,
+.B hello\ world
+becomes
+.B \&'hello\ world'
+not
+.BR hello'\ 'world .
+.PP
+The function pointer
+.I fmtdoquote
+is
+.B nil
+by default.
+If it is non-nil, characters are passed to that function to see if they should
+be quoted.
+This mechanism allows programs to specify that
+characters other than blanks, control characters, or quotes be quoted.
+Regardless of the return value of
+.IR *fmtdoquote ,
+blanks, control characters, and quotes are always quoted.
+.I Needsrcquote
+is provided as a
+.I fmtdoquote
+function that flags any character special to
+.IR rc (1).
+.PP
+.I Quotestrfmt
+and
+.I quoterunestrfmt
+are
+.IR print (3)
+formatting routines that produce quoted strings as output.
+They may be installed by hand, but
+.I quotefmtinstall
+installs them under the standard format characters
+.B q
+and
+.BR Q .
+(They are not installed automatically.)
+If the format string includes the alternate format character
+.BR # ,
+for example
+.BR %#q ,
+the printed string will always be quoted; otherwise quotes will only be provided if necessary
+to avoid ambiguity.
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH "SEE ALSO
+.IR rc (1),
+.IR malloc (3),
+.IR print (3),
+.IR strcat (3)
--- a/unix/man/regexp9.3
+++ b/unix/man/regexp9.3
@ -0,0 +1,212 @@
+.TH REGEXP9 3
+.SH NAME
+regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression
+.SH SYNOPSIS
+.B #include <utf.h>
+.br
+.B #include <fmt.h>
+.br
+.B #include <regexp9.h>
+.PP
+.ta \w'\fLRegprog 'u
+.B
+Reprog	*regcomp(char *exp)
+.PP
+.B
+Reprog	*regcomplit(char *exp)
+.PP
+.B
+Reprog	*regcompnl(char *exp)
+.PP
+.nf
+.B
+int  regexec(Reprog *prog, char *string, Resub *match, int msize)
+.PP
+.nf
+.B
+void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
+.PP
+.nf
+.B
+int  rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
+.PP
+.nf
+.B
+void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
+.PP
+.B
+void regerror(char *msg)
+.SH DESCRIPTION
+.I Regcomp
+compiles a
+regular expression and returns
+a pointer to the generated description.
+The space is allocated by
+.IR malloc (3)
+and may be released by
+.IR free .
+Regular expressions are exactly as in
+.IR regexp9 (7).
+.PP
+.I Regcomplit
+is like
+.I regcomp
+except that all characters are treated literally.
+.I Regcompnl
+is like
+.I regcomp
+except that the
+.B .
+metacharacter matches all characters, including newlines.
+.PP
+.I Regexec
+matches a null-terminated
+.I string
+against the compiled regular expression in
+.IR prog .
+If it matches,
+.I regexec
+returns
+.B 1
+and fills in the array
+.I match
+with character pointers to the substrings of
+.I string
+that correspond to the
+parenthesized subexpressions of 
+.IR exp :
+.BI match[ i ].sp
+points to the beginning and
+.BI match[ i ].ep
+points just beyond
+the end of the
+.IR i th
+substring.
+(Subexpression
+.I i
+begins at the
+.IR i th
+left parenthesis, counting from 1.)
+Pointers in
+.B match[0]
+pick out the substring that corresponds to
+the whole regular expression.
+Unused elements of
+.I match
+are filled with zeros.
+Matches involving
+.LR * ,
+.LR + ,
+and 
+.L ?
+are extended as far as possible.
+The number of array elements in 
+.I match
+is given by
+.IR msize .
+The structure of elements of
+.I match 
+is:
+.IP
+.EX
+typedef struct {
+	union {
+	   char *sp;
+	   Rune *rsp;
+	};
+	union {
+	   char *ep;
+	   Rune *rep;
+	};
+} Resub;
+.EE
+.LP
+If
+.B match[0].sp
+is nonzero on entry,
+.I regexec
+starts matching at that point within
+.IR string .
+If
+.B match[0].ep
+is nonzero on entry,
+the last character matched is the one
+preceding that point.
+.PP
+.I Regsub
+places in
+.I dest
+a substitution instance of
+.I source
+in the context of the last
+.I regexec
+performed using
+.IR match .
+Each instance of
+.BI \e n\f1,
+where
+.I n
+is a digit, is replaced by the
+string delimited by
+.BI match[ n ].sp
+and
+.BI match[ n ].ep\f1.
+Each instance of 
+.L &
+is replaced by the string delimited by
+.B match[0].sp
+and
+.BR match[0].ep .
+The substitution will always be null terminated and
+trimmed to fit into dlen bytes.
+.PP
+.IR Regerror ,
+called whenever an error is detected in
+.IR regcomp ,
+writes the string
+.I msg
+on the standard error file and exits.
+.I Regerror
+can be replaced to perform
+special error processing.
+If the user supplied
+.I regerror
+returns rather than exits,
+.I regcomp
+will return 0. 
+.PP
+.I Rregexec
+and
+.I rregsub
+are variants of 
+.I regexec
+and
+.I regsub
+that use strings of
+.B Runes
+instead of strings of
+.BR chars .
+With these routines, the 
+.I rsp
+and
+.I rep
+fields of the
+.I match
+array elements should be used.
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH "SEE ALSO"
+.IR grep (1)
+.SH DIAGNOSTICS
+.I Regcomp
+returns 
+.B 0
+for an illegal expression
+or other failure.
+.I Regexec
+returns 0
+if
+.I string
+is not matched.
+.SH BUGS
+There is no way to specify or match a NUL character; NULs terminate patterns and strings.
--- a/unix/man/regexp9.7
+++ b/unix/man/regexp9.7
@ -0,0 +1,133 @@
+.TH REGEXP9 7
+.SH NAME
+regexp \- Plan 9 regular expression notation
+.SH DESCRIPTION
+This manual page describes the regular expression
+syntax used by the Plan 9 regular expression library
+.IR regexp9 (3).
+It is the form used by
+.IR egrep (1)
+before
+.I egrep
+got complicated.
+.PP
+A 
+.I "regular expression"
+specifies
+a set of strings of characters.
+A member of this set of strings is said to be
+.I matched
+by the regular expression.  In many applications
+a delimiter character, commonly
+.LR / ,
+bounds a regular expression.
+In the following specification for regular expressions
+the word `character' means any character (rune) but newline.
+.PP
+The syntax for a regular expression
+.B e0
+is
+.IP
+.EX
+e3:  literal | charclass | '.' | '^' | '$' | '(' e0 ')'
+
+e2:  e3
+  |  e2 REP
+
+REP: '*' | '+' | '?'
+
+e1:  e2
+  |  e1 e2
+
+e0:  e1
+  |  e0 '|' e1
+.EE
+.PP
+A
+.B literal
+is any non-metacharacter, or a metacharacter
+(one of
+.BR .*+?[]()|\e^$ ),
+or the delimiter
+preceded by 
+.LR \e .
+.PP
+A
+.B charclass
+is a nonempty string
+.I s
+bracketed
+.BI [ \|s\| ]
+(or
+.BI [^ s\| ]\fR);
+it matches any character in (or not in)
+.IR s .
+A negated character class never
+matches newline.
+A substring 
+.IB a - b\f1,
+with
+.I a
+and
+.I b
+in ascending
+order, stands for the inclusive
+range of
+characters between
+.I a
+and
+.IR b .
+In 
+.IR s ,
+the metacharacters
+.LR - ,
+.LR ] ,
+an initial
+.LR ^ ,
+and the regular expression delimiter
+must be preceded by a
+.LR \e ;
+other metacharacters 
+have no special meaning and
+may appear unescaped.
+.PP
+A 
+.L .
+matches any character.
+.PP
+A
+.L ^
+matches the beginning of a line;
+.L $
+matches the end of the line.
+.PP
+The 
+.B REP
+operators match zero or more
+.RB ( * ),
+one or more
+.RB ( + ),
+zero or one
+.RB ( ? ),
+instances respectively of the preceding regular expression 
+.BR e2 .
+.PP
+A concatenated regular expression,
+.BR "e1\|e2" ,
+matches a match to 
+.B e1
+followed by a match to
+.BR e2 .
+.PP
+An alternative regular expression,
+.BR "e0\||\|e1" ,
+matches either a match to
+.B e0
+or a match to
+.BR e1 .
+.PP
+A match to any part of a regular expression
+extends as far as possible without preventing
+a match to the remainder of the regular expression.
+.SH "SEE ALSO
+.IR regexp9 (3)
--- a/unix/man/rune.3
+++ b/unix/man/rune.3
@ -0,0 +1,186 @@
+.TH RUNE 3
+.SH NAME
+runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
+.SH SYNOPSIS
+.ta \w'\fLchar*xx'u
+.B #include <utf.h>
+.PP
+.B
+int	runetochar(char *s, Rune *r)
+.PP
+.B
+int	chartorune(Rune *r, char *s)
+.PP
+.B
+int	runelen(long r)
+.PP
+.B
+int	runenlen(Rune *r, int n)
+.PP
+.B
+int	fullrune(char *s, int n)
+.PP
+.B
+char*	utfecpy(char *s1, char *es1, char *s2)
+.PP
+.B
+int	utflen(char *s)
+.PP
+.B
+int	utfnlen(char *s, long n)
+.PP
+.B
+char*	utfrune(char *s, long c)
+.PP
+.B
+char*	utfrrune(char *s, long c)
+.PP
+.B
+char*	utfutf(char *s1, char *s2)
+.SH DESCRIPTION
+These routines convert to and from a
+.SM UTF
+byte stream and runes.
+.PP
+.I Runetochar
+copies one rune at
+.I r
+to at most
+.B UTFmax
+bytes starting at
+.I s
+and returns the number of bytes copied.
+.BR UTFmax ,
+defined as
+.B 3
+in
+.BR <libc.h> ,
+is the maximum number of bytes required to represent a rune.
+.PP
+.I Chartorune
+copies at most
+.B UTFmax
+bytes starting at
+.I s
+to one rune at
+.I r
+and returns the number of bytes copied.
+If the input is not exactly in
+.SM UTF
+format,
+.I chartorune
+will convert to 0x80 and return 1.
+.PP
+.I Runelen
+returns the number of bytes
+required to convert
+.I r
+into
+.SM UTF.
+.PP
+.I Runenlen
+returns the number of bytes
+required to convert the
+.I n
+runes pointed to by
+.I r
+into
+.SM UTF.
+.PP
+.I Fullrune
+returns 1 if the string
+.I s
+of length
+.I n
+is long enough to be decoded by
+.I chartorune
+and 0 otherwise.
+This does not guarantee that the string
+contains a legal
+.SM UTF
+encoding.
+This routine is used by programs that
+obtain input a byte at
+a time and need to know when a full rune
+has arrived.
+.PP
+The following routines are analogous to the
+corresponding string routines with
+.B utf
+substituted for
+.B str
+and
+.B rune
+substituted for
+.BR chr .
+.PP
+.I Utfecpy
+copies UTF sequences until a null sequence has been copied, but writes no 
+sequences beyond
+.IR es1 .
+If any sequences are copied,
+.I s1
+is terminated by a null sequence, and a pointer to that sequence is returned.
+Otherwise, the original
+.I s1
+is returned.
+.PP
+.I Utflen
+returns the number of runes that
+are represented by the
+.SM UTF
+string
+.IR s .
+.PP
+.I Utfnlen
+returns the number of complete runes that
+are represented by the first
+.I n
+bytes of
+.SM UTF
+string
+.IR s .
+If the last few bytes of the string contain an incompletely coded rune,
+.I utfnlen
+will not count them; in this way, it differs from
+.IR utflen ,
+which includes every byte of the string.
+.PP
+.I Utfrune
+.RI ( utfrrune )
+returns a pointer to the first (last)
+occurrence of rune
+.I c
+in the
+.SM UTF
+string
+.IR s ,
+or 0 if
+.I c
+does not occur in the string.
+The NUL byte terminating a string is considered to
+be part of the string
+.IR s .
+.PP
+.I Utfutf
+returns a pointer to the first occurrence of
+the
+.SM UTF
+string
+.I s2
+as a
+.SM UTF
+substring of
+.IR s1 ,
+or 0 if there is none.
+If
+.I s2
+is the null string,
+.I utfutf
+returns
+.IR s1 .
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR utf (7),
+.IR tcs (1)
--- a/unix/man/runestrcat.3
+++ b/unix/man/runestrcat.3
@ -0,0 +1,66 @@
+.TH RUNESTRCAT 3
+.SH NAME
+runestrcat, 
+runestrncat,
+runestrcmp,
+runestrncmp,
+runestrcpy,
+runestrncpy,
+runestrecpy,
+runestrlen,
+runestrchr,
+runestrrchr,
+runestrdup,
+runestrstr \- rune string operations
+.SH SYNOPSIS
+.B #include <u.h>
+.br
+.B #include <libc.h>
+.PP
+.ta \w'\fLRune* \fP'u
+.B
+Rune*	runestrcat(Rune *s1, Rune *s2)
+.PP
+.B
+Rune*	runestrncat(Rune *s1, Rune *s2, long n)
+.PP
+.B
+int	runestrcmp(Rune *s1, Rune *s2)
+.PP
+.B
+int	runestrncmp(Rune *s1, Rune *s2, long n)
+.PP
+.B
+Rune*	runestrcpy(Rune *s1, Rune *s2)
+.PP
+.B
+Rune*	runestrncpy(Rune *s1, Rune *s2, long n)
+.PP
+.B
+Rune*	runestrecpy(Rune *s1, Rune *es1, Rune *s2)
+.PP
+.B
+long	runestrlen(Rune *s)
+.PP
+.B
+Rune*	runestrchr(Rune *s, Rune c)
+.PP
+.B
+Rune*	runestrrchr(Rune *s, Rune c)
+.PP
+.B
+Rune*	runestrdup(Rune *s)
+.PP
+.B
+Rune*	runestrstr(Rune *s1, Rune *s2)
+.SH DESCRIPTION
+These functions are rune string analogues of
+the corresponding functions in 
+.IR strcat (3).
+.SH SOURCE
+.B http://swtch.com/plan9port/unix
+.SH SEE ALSO
+.IR rune (3),
+.IR strcat (3)
+.SH BUGS
+The outcome of overlapping moves varies among implementations.
--- a/unix/man/utf.7
+++ b/unix/man/utf.7
@ -0,0 +1,91 @@
+.TH UTF 7
+.SH NAME
+UTF, Unicode, ASCII, rune \- character set and format
+.SH DESCRIPTION
+The Plan 9 character set and representation are
+based on the Unicode Standard and on the ISO multibyte
+.SM UTF-8
+encoding (Universal Character
+Set Transformation Format, 8 bits wide).
+The Unicode Standard represents its characters in 16
+bits;
+.SM UTF-8
+represents such
+values in an 8-bit byte stream.
+Throughout this manual,
+.SM UTF-8
+is shortened to
+.SM UTF.
+.PP
+In Plan 9, a
+.I rune
+is a 16-bit quantity representing a Unicode character.
+Internally, programs may store characters as runes.
+However, any external manifestation of textual information,
+in files or at the interface between programs, uses a
+machine-independent, byte-stream encoding called
+.SM UTF.
+.PP
+.SM UTF
+is designed so the 7-bit
+.SM ASCII
+set (values hexadecimal 00 to 7F),
+appear only as themselves
+in the encoding.
+Runes with values above 7F appear as sequences of two or more
+bytes with values only from 80 to FF.
+.PP
+The
+.SM UTF
+encoding of the Unicode Standard is backward compatible with
+.SM ASCII\c
+:
+programs presented only with
+.SM ASCII
+work on Plan 9
+even if not written to deal with
+.SM UTF,
+as do
+programs that deal with uninterpreted byte streams.
+However, programs that perform semantic processing on
+.SM ASCII
+graphic
+characters must convert from
+.SM UTF
+to runes
+in order to work properly with non-\c
+.SM ASCII
+input.
+See
+.IR rune (3).
+.PP
+Letting numbers be binary,
+a rune x is converted to a multibyte
+.SM UTF
+sequence
+as follows:
+.PP
+01.   x in [00000000.0bbbbbbb] → 0bbbbbbb
+.br
+10.   x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb
+.br
+11.   x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb
+.br
+.PP
+Conversion 01 provides a one-byte sequence that spans the
+.SM ASCII
+character set in a compatible way.
+Conversions 10 and 11 represent higher-valued characters
+as sequences of two or three bytes with the high bit set.
+Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open.
+When there are multiple ways to encode a value, for example rune 0,
+the shortest encoding is used.
+.PP
+In the inverse mapping,
+any sequence except those described above
+is incorrect and is converted to rune hexadecimal 0080.
+.SH "SEE ALSO"
+.IR ascii (1),
+.IR tcs (1),
+.IR rune (3),
+.IR "The Unicode Standard" .
				`@ -0,0 +1 @@`
				`bio3.html fmtinstall3.html fmtstrtod3.html index.html isalpharune3.html mk1.html print3.html quote3.html regexp93.html regexp97.html rune3.html runestrcat3.html utf7.html`