Initial revision
This commit is contained in:
parent
5f7d5e8d18
commit
b2cfc4e2e7
242 changed files with 18177 additions and 0 deletions
258
src/libregexp/LICENSE
Normal file
258
src/libregexp/LICENSE
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
The Plan 9 software is provided under the terms of the
|
||||
Lucent Public License, Version 1.02, reproduced below,
|
||||
with the following exceptions:
|
||||
|
||||
1. No right is granted to create derivative works of or
|
||||
to redistribute (other than with the Plan 9 Operating System)
|
||||
the screen imprinter fonts identified in subdirectory
|
||||
/lib/font/bit/lucida and printer fonts (Lucida Sans Unicode, Lucida
|
||||
Sans Italic, Lucida Sans Demibold, Lucida Typewriter, Lucida Sans
|
||||
Typewriter83), identified in subdirectory /sys/lib/postscript/font.
|
||||
These directories contain material copyrights by B&H Inc. and Y&Y Inc.
|
||||
|
||||
2. The printer fonts identified in subdirectory /sys/lib/ghostscript/font
|
||||
are subject to the GNU GPL, reproduced in the file /LICENSE.gpl.
|
||||
|
||||
3. The ghostscript program in the subdirectory /sys/src/cmd/gs is
|
||||
covered by the Aladdin Free Public License, reproduced in the file
|
||||
/LICENSE.afpl.
|
||||
|
||||
===================================================================
|
||||
|
||||
Lucent Public License Version 1.02
|
||||
|
||||
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS PUBLIC
|
||||
LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE
|
||||
PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
|
||||
|
||||
1. DEFINITIONS
|
||||
|
||||
"Contribution" means:
|
||||
|
||||
a. in the case of Lucent Technologies Inc. ("LUCENT"), the Original
|
||||
Program, and
|
||||
b. in the case of each Contributor,
|
||||
|
||||
i. changes to the Program, and
|
||||
ii. additions to the Program;
|
||||
|
||||
where such changes and/or additions to the Program were added to the
|
||||
Program by such Contributor itself or anyone acting on such
|
||||
Contributor's behalf, and the Contributor explicitly consents, in
|
||||
accordance with Section 3C, to characterization of the changes and/or
|
||||
additions as Contributions.
|
||||
|
||||
"Contributor" means LUCENT and any other entity that has Contributed a
|
||||
Contribution to the Program.
|
||||
|
||||
"Distributor" means a Recipient that distributes the Program,
|
||||
modifications to the Program, or any part thereof.
|
||||
|
||||
"Licensed Patents" mean patent claims licensable by a Contributor
|
||||
which are necessarily infringed by the use or sale of its Contribution
|
||||
alone or when combined with the Program.
|
||||
|
||||
"Original Program" means the original version of the software
|
||||
accompanying this Agreement as released by LUCENT, including source
|
||||
code, object code and documentation, if any.
|
||||
|
||||
"Program" means the Original Program and Contributions or any part
|
||||
thereof
|
||||
|
||||
"Recipient" means anyone who receives the Program under this
|
||||
Agreement, including all Contributors.
|
||||
|
||||
2. GRANT OF RIGHTS
|
||||
|
||||
a. Subject to the terms of this Agreement, each Contributor hereby
|
||||
grants Recipient a non-exclusive, worldwide, royalty-free copyright
|
||||
license to reproduce, prepare derivative works of, publicly display,
|
||||
publicly perform, distribute and sublicense the Contribution of such
|
||||
Contributor, if any, and such derivative works, in source code and
|
||||
object code form.
|
||||
|
||||
b. Subject to the terms of this Agreement, each Contributor hereby
|
||||
grants Recipient a non-exclusive, worldwide, royalty-free patent
|
||||
license under Licensed Patents to make, use, sell, offer to sell,
|
||||
import and otherwise transfer the Contribution of such Contributor, if
|
||||
any, in source code and object code form. The patent license granted
|
||||
by a Contributor shall also apply to the combination of the
|
||||
Contribution of that Contributor and the Program if, at the time the
|
||||
Contribution is added by the Contributor, such addition of the
|
||||
Contribution causes such combination to be covered by the Licensed
|
||||
Patents. The patent license granted by a Contributor shall not apply
|
||||
to (i) any other combinations which include the Contribution, nor to
|
||||
(ii) Contributions of other Contributors. No hardware per se is
|
||||
licensed hereunder.
|
||||
|
||||
c. Recipient understands that although each Contributor grants the
|
||||
licenses to its Contributions set forth herein, no assurances are
|
||||
provided by any Contributor that the Program does not infringe the
|
||||
patent or other intellectual property rights of any other entity. Each
|
||||
Contributor disclaims any liability to Recipient for claims brought by
|
||||
any other entity based on infringement of intellectual property rights
|
||||
or otherwise. As a condition to exercising the rights and licenses
|
||||
granted hereunder, each Recipient hereby assumes sole responsibility
|
||||
to secure any other intellectual property rights needed, if any. For
|
||||
example, if a third party patent license is required to allow
|
||||
Recipient to distribute the Program, it is Recipient's responsibility
|
||||
to acquire that license before distributing the Program.
|
||||
|
||||
d. Each Contributor represents that to its knowledge it has sufficient
|
||||
copyright rights in its Contribution, if any, to grant the copyright
|
||||
license set forth in this Agreement.
|
||||
|
||||
3. REQUIREMENTS
|
||||
|
||||
A. Distributor may choose to distribute the Program in any form under
|
||||
this Agreement or under its own license agreement, provided that:
|
||||
|
||||
a. it complies with the terms and conditions of this Agreement;
|
||||
|
||||
b. if the Program is distributed in source code or other tangible
|
||||
form, a copy of this Agreement or Distributor's own license agreement
|
||||
is included with each copy of the Program; and
|
||||
|
||||
c. if distributed under Distributor's own license agreement, such
|
||||
license agreement:
|
||||
|
||||
i. effectively disclaims on behalf of all Contributors all warranties
|
||||
and conditions, express and implied, including warranties or
|
||||
conditions of title and non-infringement, and implied warranties or
|
||||
conditions of merchantability and fitness for a particular purpose;
|
||||
ii. effectively excludes on behalf of all Contributors all liability
|
||||
for damages, including direct, indirect, special, incidental and
|
||||
consequential damages, such as lost profits; and
|
||||
iii. states that any provisions which differ from this Agreement are
|
||||
offered by that Contributor alone and not by any other party.
|
||||
|
||||
B. Each Distributor must include the following in a conspicuous
|
||||
location in the Program:
|
||||
|
||||
Copyright (C) 2003, Lucent Technologies Inc. and others. All Rights
|
||||
Reserved.
|
||||
|
||||
C. In addition, each Contributor must identify itself as the
|
||||
originator of its Contribution in a manner that reasonably allows
|
||||
subsequent Recipients to identify the originator of the Contribution.
|
||||
Also, each Contributor must agree that the additions and/or changes
|
||||
are intended to be a Contribution. Once a Contribution is contributed,
|
||||
it may not thereafter be revoked.
|
||||
|
||||
4. COMMERCIAL DISTRIBUTION
|
||||
|
||||
Commercial distributors of software may accept certain
|
||||
responsibilities with respect to end users, business partners and the
|
||||
like. While this license is intended to facilitate the commercial use
|
||||
of the Program, the Distributor who includes the Program in a
|
||||
commercial product offering should do so in a manner which does not
|
||||
create potential liability for Contributors. Therefore, if a
|
||||
Distributor includes the Program in a commercial product offering,
|
||||
such Distributor ("Commercial Distributor") hereby agrees to defend
|
||||
and indemnify every Contributor ("Indemnified Contributor") against
|
||||
any losses, damages and costs (collectively"Losses") arising from
|
||||
claims, lawsuits and other legal actions brought by a third party
|
||||
against the Indemnified Contributor to the extent caused by the acts
|
||||
or omissions of such Commercial Distributor in connection with its
|
||||
distribution of the Program in a commercial product offering. The
|
||||
obligations in this section do not apply to any claims or Losses
|
||||
relating to any actual or alleged intellectual property infringement.
|
||||
In order to qualify, an Indemnified Contributor must: a) promptly
|
||||
notify the Commercial Distributor in writing of such claim, and b)
|
||||
allow the Commercial Distributor to control, and cooperate with the
|
||||
Commercial Distributor in, the defense and any related settlement
|
||||
negotiations. The Indemnified Contributor may participate in any such
|
||||
claim at its own expense.
|
||||
|
||||
For example, a Distributor might include the Program in a commercial
|
||||
product offering, Product X. That Distributor is then a Commercial
|
||||
Distributor. If that Commercial Distributor then makes performance
|
||||
claims, or offers warranties related to Product X, those performance
|
||||
claims and warranties are such Commercial Distributor's responsibility
|
||||
alone. Under this section, the Commercial Distributor would have to
|
||||
defend claims against the Contributors related to those performance
|
||||
claims and warranties, and if a court requires any Contributor to pay
|
||||
any damages as a result, the Commercial Distributor must pay those
|
||||
damages.
|
||||
|
||||
5. NO WARRANTY
|
||||
|
||||
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS
|
||||
PROVIDED ON AN"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY
|
||||
WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY
|
||||
OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely
|
||||
responsible for determining the appropriateness of using and
|
||||
distributing the Program and assumes all risks associated with its
|
||||
exercise of rights under this Agreement, including but not limited to
|
||||
the risks and costs of program errors, compliance with applicable
|
||||
laws, damage to or loss of data, programs or equipment, and
|
||||
unavailability or interruption of operations.
|
||||
|
||||
6. DISCLAIMER OF LIABILITY
|
||||
|
||||
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
|
||||
ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
|
||||
WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
|
||||
DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
|
||||
HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
7. EXPORT CONTROL
|
||||
|
||||
Recipient agrees that Recipient alone is responsible for compliance
|
||||
with the United States export administration regulations (and the
|
||||
export control laws and regulation of any other countries).
|
||||
|
||||
8. GENERAL
|
||||
|
||||
If any provision of this Agreement is invalid or unenforceable under
|
||||
applicable law, it shall not affect the validity or enforceability of
|
||||
the remainder of the terms of this Agreement, and without further
|
||||
action by the parties hereto, such provision shall be reformed to the
|
||||
minimum extent necessary to make such provision valid and enforceable.
|
||||
|
||||
If Recipient institutes patent litigation against a Contributor with
|
||||
respect to a patent applicable to software (including a cross-claim or
|
||||
counterclaim in a lawsuit), then any patent licenses granted by that
|
||||
Contributor to such Recipient under this Agreement shall terminate as
|
||||
of the date such litigation is filed. In addition, if Recipient
|
||||
institutes patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Program
|
||||
itself (excluding combinations of the Program with other software or
|
||||
hardware) infringes such Recipient's patent(s), then such Recipient's
|
||||
rights granted under Section 2(b) shall terminate as of the date such
|
||||
litigation is filed.
|
||||
|
||||
All Recipient's rights under this Agreement shall terminate if it
|
||||
fails to comply with any of the material terms or conditions of this
|
||||
Agreement and does not cure such failure in a reasonable period of
|
||||
time after becoming aware of such noncompliance. If all Recipient's
|
||||
rights under this Agreement terminate, Recipient agrees to cease use
|
||||
and distribution of the Program as soon as reasonably practicable.
|
||||
However, Recipient's obligations under this Agreement and any licenses
|
||||
granted by Recipient relating to the Program shall continue and
|
||||
survive.
|
||||
|
||||
LUCENT may publish new versions (including revisions) of this
|
||||
Agreement from time to time. Each new version of the Agreement will be
|
||||
given a distinguishing version number. The Program (including
|
||||
Contributions) may always be distributed subject to the version of the
|
||||
Agreement under which it was received. In addition, after a new
|
||||
version of the Agreement is published, Contributor may elect to
|
||||
distribute the Program (including its Contributions) under the new
|
||||
version. No one other than LUCENT has the right to modify this
|
||||
Agreement. Except as expressly stated in Sections 2(a) and 2(b) above,
|
||||
Recipient receives no rights or licenses to the intellectual property
|
||||
of any Contributor under this Agreement, whether expressly, by
|
||||
implication, estoppel or otherwise. All rights in the Program not
|
||||
expressly granted under this Agreement are reserved.
|
||||
|
||||
This Agreement is governed by the laws of the State of New York and
|
||||
the intellectual property laws of the United States of America. No
|
||||
party to this Agreement will bring a legal action under this Agreement
|
||||
more than one year after the cause of action arose. Each party waives
|
||||
its rights to a jury trial in any resulting litigation.
|
||||
|
||||
6
src/libregexp/Make.Darwin-PowerMacintosh
Normal file
6
src/libregexp/Make.Darwin-PowerMacintosh
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
CC=gcc
|
||||
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I${PREFIX}/include
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O
|
||||
7
src/libregexp/Make.FreeBSD-386
Normal file
7
src/libregexp/Make.FreeBSD-386
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
CC=gcc
|
||||
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
|
||||
NAN=nan64.$O
|
||||
6
src/libregexp/Make.HP-UX-9000
Normal file
6
src/libregexp/Make.HP-UX-9000
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
CC=cc
|
||||
CFLAGS=-O -c -Ae -I.
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O
|
||||
7
src/libregexp/Make.Linux-386
Normal file
7
src/libregexp/Make.Linux-386
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
CC=gcc
|
||||
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I.
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
|
||||
NAN=nan64.$O
|
||||
7
src/libregexp/Make.NetBSD-386
Normal file
7
src/libregexp/Make.NetBSD-386
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
CC=gcc
|
||||
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME)
|
||||
NAN=nan64.$O
|
||||
6
src/libregexp/Make.OSF1-alpha
Normal file
6
src/libregexp/Make.OSF1-alpha
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
CC=cc
|
||||
CFLAGS+=-g -c -I.
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O
|
||||
2
src/libregexp/Make.SunOS-sun4u
Normal file
2
src/libregexp/Make.SunOS-sun4u
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
include Make.SunOS-sun4u-$(CC)
|
||||
NAN=nan64.$O
|
||||
6
src/libregexp/Make.SunOS-sun4u-cc
Normal file
6
src/libregexp/Make.SunOS-sun4u-cc
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
CC=cc
|
||||
CFLAGS+=-g -c -I. -O
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O
|
||||
6
src/libregexp/Make.SunOS-sun4u-gcc
Normal file
6
src/libregexp/Make.SunOS-sun4u-gcc
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
CC=gcc
|
||||
CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c
|
||||
O=o
|
||||
AR=ar
|
||||
ARFLAGS=rvc
|
||||
NAN=nan64.$O
|
||||
106
src/libregexp/Makefile
Normal file
106
src/libregexp/Makefile
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
|
||||
# this works in gnu make
|
||||
SYSNAME:=${shell uname}
|
||||
OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'}
|
||||
|
||||
# this works in bsd make
|
||||
SYSNAME!=uname
|
||||
OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'
|
||||
|
||||
# the gnu rules will mess up bsd but not vice versa,
|
||||
# hence the gnu rules come first.
|
||||
|
||||
include Make.$(SYSNAME)-$(OBJTYPE)
|
||||
|
||||
PREFIX=/usr/local
|
||||
|
||||
NUKEFILES=
|
||||
|
||||
TGZFILES=
|
||||
|
||||
LIB=libregexp9.a
|
||||
VERSION=2.0
|
||||
PORTPLACE=devel/libregexp9
|
||||
NAME=libregexp9
|
||||
|
||||
OFILES=\
|
||||
regcomp.$O\
|
||||
regerror.$O\
|
||||
regexec.$O\
|
||||
regsub.$O\
|
||||
regaux.$O\
|
||||
rregsub.$O\
|
||||
rregaux.$O\
|
||||
rregexec.$O\
|
||||
|
||||
HFILES=\
|
||||
regexp9.h\
|
||||
regcomp.h\
|
||||
|
||||
all: $(LIB)
|
||||
|
||||
install: $(LIB)
|
||||
test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3
|
||||
test -d $(PREFIX)/man/man7 || mkdir $(PREFIX)/man/man7
|
||||
install -m 0644 regexp9.3 $(PREFIX)/man/man3/regexp9.3
|
||||
install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7
|
||||
install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
|
||||
install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h
|
||||
|
||||
test: test.$O $(LIB)
|
||||
$(CC) -o test test.$O $(LIB) -L/usr/local/lib -lfmt -lutf
|
||||
|
||||
test2: test2.$O $(LIB)
|
||||
$(CC) -o test2 test2.$O $(LIB) -L/usr/local/lib -lfmt -lutf
|
||||
|
||||
$(LIB): $(OFILES)
|
||||
$(AR) $(ARFLAGS) $(LIB) $(OFILES)
|
||||
|
||||
NUKEFILES+=$(LIB)
|
||||
.c.$O:
|
||||
$(CC) $(CFLAGS) -I$(PREFIX)/include $*.c
|
||||
|
||||
%.$O: %.c
|
||||
$(CC) $(CFLAGS) -I$(PREFIX)/include $*.c
|
||||
|
||||
|
||||
$(OFILES): $(HFILES)
|
||||
|
||||
tgz:
|
||||
rm -rf $(NAME)-$(VERSION)
|
||||
mkdir $(NAME)-$(VERSION)
|
||||
cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION)
|
||||
tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz
|
||||
rm -rf $(NAME)-$(VERSION)
|
||||
|
||||
clean:
|
||||
rm -f $(OFILES) $(LIB)
|
||||
|
||||
nuke:
|
||||
rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES)
|
||||
|
||||
rpm:
|
||||
make tgz
|
||||
cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES
|
||||
rpm -ba rpm.spec
|
||||
cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm .
|
||||
cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm .
|
||||
scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software
|
||||
|
||||
PORTDIR=/usr/ports/$(PORTPLACE)
|
||||
|
||||
ports:
|
||||
make tgz
|
||||
rm -rf $(PORTDIR)
|
||||
mkdir $(PORTDIR)
|
||||
cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles
|
||||
cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}')
|
||||
(cd $(PORTDIR); make makesum)
|
||||
(cd $(PORTDIR); make)
|
||||
(cd $(PORTDIR); /usr/local/bin/portlint)
|
||||
rm -rf $(PORTDIR)/work
|
||||
shar `find $(PORTDIR)` > ports.shar
|
||||
(cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz
|
||||
scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software
|
||||
|
||||
.phony: all clean nuke install tgz rpm ports
|
||||
34
src/libregexp/Makefile.MID
Normal file
34
src/libregexp/Makefile.MID
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
LIB=libregexp9.a
|
||||
VERSION=2.0
|
||||
PORTPLACE=devel/libregexp9
|
||||
NAME=libregexp9
|
||||
|
||||
OFILES=\
|
||||
regcomp.$O\
|
||||
regerror.$O\
|
||||
regexec.$O\
|
||||
regsub.$O\
|
||||
regaux.$O\
|
||||
rregsub.$O\
|
||||
rregaux.$O\
|
||||
|
||||
HFILES=\
|
||||
regexp9.h\
|
||||
regcomp.h\
|
||||
|
||||
all: $(LIB)
|
||||
|
||||
install: $(LIB)
|
||||
test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3
|
||||
test -d $(PREFIX)/man/man7 || mkdir $(PREFIX)/man/man7
|
||||
install -m 0644 regexp9.3 $(PREFIX)/man/man3/regexp9.3
|
||||
install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7
|
||||
install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
|
||||
install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h
|
||||
|
||||
test: test.$O $(LIB)
|
||||
$(CC) -o test test.$O $(LIB) -L/usr/local/lib -lfmt -lutf
|
||||
|
||||
test2: test2.$O $(LIB)
|
||||
$(CC) -o test2 test2.$O $(LIB) -L/usr/local/lib -lfmt -lutf
|
||||
|
||||
25
src/libregexp/NOTICE
Normal file
25
src/libregexp/NOTICE
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
|
||||
Portions Copyright © 2000-2002 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
|
||||
|
||||
Under a licence agreement with Lucent Technologies Inc. effective 1st March 2000,
|
||||
Vita Nuova Holdings Limited has the right to determine (within a specified scope)
|
||||
the form and content of sublicences for this software.
|
||||
|
||||
Vita Nuova Holdings Limited now makes this software available as Free
|
||||
Software under the terms of the `GNU Lesser Public License, Version 2.1'
|
||||
(see the file LICENCE or http://www.fsf.org/copyleft/lesser.html for
|
||||
the full terms and conditions). One of the conditions of that licence
|
||||
is that you must keep intact all notices that refer to that licence and to the absence of
|
||||
of any warranty: for this software, note that includes this NOTICE file in particular.
|
||||
|
||||
This suite of programs is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
`GNU General Public License' for more details.
|
||||
|
||||
This copyright NOTICE applies to all files in this directory and
|
||||
subdirectories, unless another copyright notice appears in a given
|
||||
file or subdirectory. If you take code from this software to use in
|
||||
other programs, you must somehow include with it an appropriate
|
||||
copyright notice that includes the copyright notice and the other
|
||||
notices above.
|
||||
7
src/libregexp/README
Normal file
7
src/libregexp/README
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
This is a Unix port of the Plan 9 regular expression library,
|
||||
originally done for the Inferno operating system.
|
||||
|
||||
Russ Cox repackaged this to build as a standalone
|
||||
Unix library. Send comments about packaging to
|
||||
Russ Cox <rsc@post.harvard.edu>
|
||||
|
||||
51
src/libregexp/bundle.ports
Normal file
51
src/libregexp/bundle.ports
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
--- Makefile ---
|
||||
# New ports collection makefile for: libbio
|
||||
# Date Created: 11 Feb 2003
|
||||
# Whom: rsc
|
||||
#
|
||||
# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT
|
||||
# $FreeBSD: ports/devel/libbio/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $
|
||||
|
||||
PORTNAME= libregexp9
|
||||
PORTVERSION= 2.0
|
||||
CATEGORIES= devel
|
||||
MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/
|
||||
EXTRACT_SUFX= .tgz
|
||||
|
||||
MAINTAINER= rsc@post.harvard.edu
|
||||
|
||||
DEPENDS= ${PORTSDIR}/devel/libfmt ${PORTSDIR}/devel/libutf
|
||||
|
||||
MAN3= regexp9.3
|
||||
MAN7= regexp9.7
|
||||
USE_REINPLACE= yes
|
||||
|
||||
.include <bsd.port.pre.mk>
|
||||
|
||||
post-patch:
|
||||
${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile
|
||||
|
||||
.include <bsd.port.post.mk>
|
||||
--- pkg-comment ---
|
||||
Simple regular expression library from Plan 9
|
||||
--- pkg-descr ---
|
||||
Libregexp9 is a port of Plan 9's regexp library.
|
||||
It is small and simple and provides the traditional
|
||||
extended regular expressions (as opposed to the
|
||||
current extended regular expressions, which add {}
|
||||
and various \x character classes, among other
|
||||
complications).
|
||||
|
||||
It handles Unicode in wide character or UTF8 format!
|
||||
|
||||
WWW: http://pdos.lcs.mit.edu/~rsc/software/
|
||||
http://plan9.bell-labs.com/magic/man2html/2/regexp
|
||||
|
||||
Russ Cox
|
||||
rsc@post.harvard.edu
|
||||
--- pkg-plist ---
|
||||
lib/libregexp9.a
|
||||
include/regex9.h
|
||||
--- /dev/null ---
|
||||
This is just a way to make sure blank lines don't
|
||||
creep into pkg-plist.
|
||||
6
src/libregexp/lib9.h
Normal file
6
src/libregexp/lib9.h
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
#include <fmt.h>
|
||||
#include <setjmp.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
1
src/libregexp/mkfile
Normal file
1
src/libregexp/mkfile
Normal file
|
|
@ -0,0 +1 @@
|
|||
<../libutf/mkfile
|
||||
76
src/libregexp/regaux.c
Normal file
76
src/libregexp/regaux.c
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
|
||||
/*
|
||||
* save a new match in mp
|
||||
*/
|
||||
extern void
|
||||
_renewmatch(Resub *mp, int ms, Resublist *sp)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(mp==0 || ms<=0)
|
||||
return;
|
||||
if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
|
||||
(sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
|
||||
for(i=0; i<ms && i<NSUBEXP; i++)
|
||||
mp[i] = sp->m[i];
|
||||
for(; i<ms; i++)
|
||||
mp[i].s.sp = mp[i].e.ep = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Note optimization in _renewthread:
|
||||
* *lp must be pending when _renewthread called; if *l has been looked
|
||||
* at already, the optimization is a bug.
|
||||
*/
|
||||
extern Relist*
|
||||
_renewthread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
Resublist *sep) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if((sep)->m[0].s.sp < p->se.m[0].s.sp)
|
||||
p->se = *sep;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
p->se = *sep;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* same as renewthread, but called with
|
||||
* initial empty start pointer.
|
||||
*/
|
||||
extern Relist*
|
||||
_renewemptythread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
char *sp) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(sp < p->se.m[0].s.sp) {
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
557
src/libregexp/regcomp.c
Normal file
557
src/libregexp/regcomp.c
Normal file
|
|
@ -0,0 +1,557 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
/*
|
||||
* Parser Information
|
||||
*/
|
||||
typedef
|
||||
struct Node
|
||||
{
|
||||
Reinst* first;
|
||||
Reinst* last;
|
||||
}Node;
|
||||
|
||||
Reprog RePrOg;
|
||||
|
||||
#define NSTACK 20
|
||||
static Node andstack[NSTACK];
|
||||
static Node *andp;
|
||||
static int atorstack[NSTACK];
|
||||
static int* atorp;
|
||||
static int cursubid; /* id of current subexpression */
|
||||
static int subidstack[NSTACK]; /* parallel to atorstack */
|
||||
static int* subidp;
|
||||
static int lastwasand; /* Last token was operand */
|
||||
static int nbra;
|
||||
static char* exprp; /* pointer to next character in source expression */
|
||||
static int lexdone;
|
||||
static int nclass;
|
||||
static Reclass*classp;
|
||||
static Reinst* freep;
|
||||
static int errors;
|
||||
static Rune yyrune; /* last lex'd rune */
|
||||
static Reclass*yyclassp; /* last lex'd class */
|
||||
|
||||
/* predeclared crap */
|
||||
static void operator(int);
|
||||
static void pushand(Reinst*, Reinst*);
|
||||
static void pushator(int);
|
||||
static void evaluntil(int);
|
||||
static int bldcclass(void);
|
||||
|
||||
static jmp_buf regkaboom;
|
||||
|
||||
static void
|
||||
rcerror(char *s)
|
||||
{
|
||||
errors++;
|
||||
regerror(s);
|
||||
longjmp(regkaboom, 1);
|
||||
}
|
||||
|
||||
static Reinst*
|
||||
newinst(int t)
|
||||
{
|
||||
freep->type = t;
|
||||
freep->u2.left = 0;
|
||||
freep->u1.right = 0;
|
||||
return freep++;
|
||||
}
|
||||
|
||||
static void
|
||||
operand(int t)
|
||||
{
|
||||
Reinst *i;
|
||||
|
||||
if(lastwasand)
|
||||
operator(CAT); /* catenate is implicit */
|
||||
i = newinst(t);
|
||||
|
||||
if(t == CCLASS || t == NCCLASS)
|
||||
i->u1.cp = yyclassp;
|
||||
if(t == RUNE)
|
||||
i->u1.r = yyrune;
|
||||
|
||||
pushand(i, i);
|
||||
lastwasand = TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
operator(int t)
|
||||
{
|
||||
if(t==RBRA && --nbra<0)
|
||||
rcerror("unmatched right paren");
|
||||
if(t==LBRA){
|
||||
if(++cursubid >= NSUBEXP)
|
||||
rcerror ("too many subexpressions");
|
||||
nbra++;
|
||||
if(lastwasand)
|
||||
operator(CAT);
|
||||
} else
|
||||
evaluntil(t);
|
||||
if(t != RBRA)
|
||||
pushator(t);
|
||||
lastwasand = FALSE;
|
||||
if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
|
||||
lastwasand = TRUE; /* these look like operands */
|
||||
}
|
||||
|
||||
static void
|
||||
regerr2(char *s, int c)
|
||||
{
|
||||
char buf[100];
|
||||
char *cp = buf;
|
||||
while(*s)
|
||||
*cp++ = *s++;
|
||||
*cp++ = c;
|
||||
*cp = '\0';
|
||||
rcerror(buf);
|
||||
}
|
||||
|
||||
static void
|
||||
cant(char *s)
|
||||
{
|
||||
char buf[100];
|
||||
strcpy(buf, "can't happen: ");
|
||||
strcat(buf, s);
|
||||
rcerror(buf);
|
||||
}
|
||||
|
||||
static void
|
||||
pushand(Reinst *f, Reinst *l)
|
||||
{
|
||||
if(andp >= &andstack[NSTACK])
|
||||
cant("operand stack overflow");
|
||||
andp->first = f;
|
||||
andp->last = l;
|
||||
andp++;
|
||||
}
|
||||
|
||||
static void
|
||||
pushator(int t)
|
||||
{
|
||||
if(atorp >= &atorstack[NSTACK])
|
||||
cant("operator stack overflow");
|
||||
*atorp++ = t;
|
||||
*subidp++ = cursubid;
|
||||
}
|
||||
|
||||
static Node*
|
||||
popand(int op)
|
||||
{
|
||||
Reinst *inst;
|
||||
|
||||
if(andp <= &andstack[0]){
|
||||
regerr2("missing operand for ", op);
|
||||
inst = newinst(NOP);
|
||||
pushand(inst,inst);
|
||||
}
|
||||
return --andp;
|
||||
}
|
||||
|
||||
static int
|
||||
popator(void)
|
||||
{
|
||||
if(atorp <= &atorstack[0])
|
||||
cant("operator stack underflow");
|
||||
--subidp;
|
||||
return *--atorp;
|
||||
}
|
||||
|
||||
static void
|
||||
evaluntil(int pri)
|
||||
{
|
||||
Node *op1, *op2;
|
||||
Reinst *inst1, *inst2;
|
||||
|
||||
while(pri==RBRA || atorp[-1]>=pri){
|
||||
switch(popator()){
|
||||
default:
|
||||
rcerror("unknown operator in evaluntil");
|
||||
break;
|
||||
case LBRA: /* must have been RBRA */
|
||||
op1 = popand('(');
|
||||
inst2 = newinst(RBRA);
|
||||
inst2->u1.subid = *subidp;
|
||||
op1->last->u2.next = inst2;
|
||||
inst1 = newinst(LBRA);
|
||||
inst1->u1.subid = *subidp;
|
||||
inst1->u2.next = op1->first;
|
||||
pushand(inst1, inst2);
|
||||
return;
|
||||
case OR:
|
||||
op2 = popand('|');
|
||||
op1 = popand('|');
|
||||
inst2 = newinst(NOP);
|
||||
op2->last->u2.next = inst2;
|
||||
op1->last->u2.next = inst2;
|
||||
inst1 = newinst(OR);
|
||||
inst1->u1.right = op1->first;
|
||||
inst1->u2.left = op2->first;
|
||||
pushand(inst1, inst2);
|
||||
break;
|
||||
case CAT:
|
||||
op2 = popand(0);
|
||||
op1 = popand(0);
|
||||
op1->last->u2.next = op2->first;
|
||||
pushand(op1->first, op2->last);
|
||||
break;
|
||||
case STAR:
|
||||
op2 = popand('*');
|
||||
inst1 = newinst(OR);
|
||||
op2->last->u2.next = inst1;
|
||||
inst1->u1.right = op2->first;
|
||||
pushand(inst1, inst1);
|
||||
break;
|
||||
case PLUS:
|
||||
op2 = popand('+');
|
||||
inst1 = newinst(OR);
|
||||
op2->last->u2.next = inst1;
|
||||
inst1->u1.right = op2->first;
|
||||
pushand(op2->first, inst1);
|
||||
break;
|
||||
case QUEST:
|
||||
op2 = popand('?');
|
||||
inst1 = newinst(OR);
|
||||
inst2 = newinst(NOP);
|
||||
inst1->u2.left = inst2;
|
||||
inst1->u1.right = op2->first;
|
||||
op2->last->u2.next = inst2;
|
||||
pushand(inst1, inst2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Reprog*
|
||||
optimize(Reprog *pp)
|
||||
{
|
||||
Reinst *inst, *target;
|
||||
int size;
|
||||
Reprog *npp;
|
||||
Reclass *cl;
|
||||
int diff;
|
||||
|
||||
/*
|
||||
* get rid of NOOP chains
|
||||
*/
|
||||
for(inst=pp->firstinst; inst->type!=END; inst++){
|
||||
target = inst->u2.next;
|
||||
while(target->type == NOP)
|
||||
target = target->u2.next;
|
||||
inst->u2.next = target;
|
||||
}
|
||||
|
||||
/*
|
||||
* The original allocation is for an area larger than
|
||||
* necessary. Reallocate to the actual space used
|
||||
* and then relocate the code.
|
||||
*/
|
||||
size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
|
||||
npp = (Reprog *)realloc(pp, size);
|
||||
if(npp==0 || npp==pp)
|
||||
return pp;
|
||||
diff = (char *)npp - (char *)pp;
|
||||
freep = (Reinst *)((char *)freep + diff);
|
||||
for(inst=npp->firstinst; inst<freep; inst++){
|
||||
switch(inst->type){
|
||||
case OR:
|
||||
case STAR:
|
||||
case PLUS:
|
||||
case QUEST:
|
||||
*(char **)&inst->u1.right += diff;
|
||||
break;
|
||||
case CCLASS:
|
||||
case NCCLASS:
|
||||
*(char **)&inst->u1.right += diff;
|
||||
cl = inst->u1.cp;
|
||||
*(char **)&cl->end += diff;
|
||||
break;
|
||||
}
|
||||
*(char **)&inst->u2.left += diff;
|
||||
}
|
||||
*(char **)&npp->startinst += diff;
|
||||
return npp;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void
|
||||
dumpstack(void){
|
||||
Node *stk;
|
||||
int *ip;
|
||||
|
||||
print("operators\n");
|
||||
for(ip=atorstack; ip<atorp; ip++)
|
||||
print("0%o\n", *ip);
|
||||
print("operands\n");
|
||||
for(stk=andstack; stk<andp; stk++)
|
||||
print("0%o\t0%o\n", stk->first->type, stk->last->type);
|
||||
}
|
||||
|
||||
static void
|
||||
dump(Reprog *pp)
|
||||
{
|
||||
Reinst *l;
|
||||
Rune *p;
|
||||
|
||||
l = pp->firstinst;
|
||||
do{
|
||||
print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
|
||||
l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
|
||||
if(l->type == RUNE)
|
||||
print("\t%C\n", l->r);
|
||||
else if(l->type == CCLASS || l->type == NCCLASS){
|
||||
print("\t[");
|
||||
if(l->type == NCCLASS)
|
||||
print("^");
|
||||
for(p = l->cp->spans; p < l->cp->end; p += 2)
|
||||
if(p[0] == p[1])
|
||||
print("%C", p[0]);
|
||||
else
|
||||
print("%C-%C", p[0], p[1]);
|
||||
print("]\n");
|
||||
} else
|
||||
print("\n");
|
||||
}while(l++->type);
|
||||
}
|
||||
#endif
|
||||
|
||||
static Reclass*
|
||||
newclass(void)
|
||||
{
|
||||
if(nclass >= NCLASS)
|
||||
regerr2("too many character classes; limit", NCLASS+'0');
|
||||
return &(classp[nclass++]);
|
||||
}
|
||||
|
||||
static int
|
||||
nextc(Rune *rp)
|
||||
{
|
||||
if(lexdone){
|
||||
*rp = 0;
|
||||
return 1;
|
||||
}
|
||||
exprp += chartorune(rp, exprp);
|
||||
if(*rp == L'\\'){
|
||||
exprp += chartorune(rp, exprp);
|
||||
return 1;
|
||||
}
|
||||
if(*rp == 0)
|
||||
lexdone = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
lex(int literal, int dot_type)
|
||||
{
|
||||
int quoted;
|
||||
|
||||
quoted = nextc(&yyrune);
|
||||
if(literal || quoted){
|
||||
if(yyrune == 0)
|
||||
return END;
|
||||
return RUNE;
|
||||
}
|
||||
|
||||
switch(yyrune){
|
||||
case 0:
|
||||
return END;
|
||||
case L'*':
|
||||
return STAR;
|
||||
case L'?':
|
||||
return QUEST;
|
||||
case L'+':
|
||||
return PLUS;
|
||||
case L'|':
|
||||
return OR;
|
||||
case L'.':
|
||||
return dot_type;
|
||||
case L'(':
|
||||
return LBRA;
|
||||
case L')':
|
||||
return RBRA;
|
||||
case L'^':
|
||||
return BOL;
|
||||
case L'$':
|
||||
return EOL;
|
||||
case L'[':
|
||||
return bldcclass();
|
||||
}
|
||||
return RUNE;
|
||||
}
|
||||
|
||||
static int
|
||||
bldcclass(void)
|
||||
{
|
||||
int type;
|
||||
Rune r[NCCRUNE];
|
||||
Rune *p, *ep, *np;
|
||||
Rune rune;
|
||||
int quoted;
|
||||
|
||||
/* we have already seen the '[' */
|
||||
type = CCLASS;
|
||||
yyclassp = newclass();
|
||||
|
||||
/* look ahead for negation */
|
||||
/* SPECIAL CASE!!! negated classes don't match \n */
|
||||
ep = r;
|
||||
quoted = nextc(&rune);
|
||||
if(!quoted && rune == L'^'){
|
||||
type = NCCLASS;
|
||||
quoted = nextc(&rune);
|
||||
*ep++ = L'\n';
|
||||
*ep++ = L'\n';
|
||||
}
|
||||
|
||||
/* parse class into a set of spans */
|
||||
for(; ep<&r[NCCRUNE];){
|
||||
if(rune == 0){
|
||||
rcerror("malformed '[]'");
|
||||
return 0;
|
||||
}
|
||||
if(!quoted && rune == L']')
|
||||
break;
|
||||
if(!quoted && rune == L'-'){
|
||||
if(ep == r){
|
||||
rcerror("malformed '[]'");
|
||||
return 0;
|
||||
}
|
||||
quoted = nextc(&rune);
|
||||
if((!quoted && rune == L']') || rune == 0){
|
||||
rcerror("malformed '[]'");
|
||||
return 0;
|
||||
}
|
||||
*(ep-1) = rune;
|
||||
} else {
|
||||
*ep++ = rune;
|
||||
*ep++ = rune;
|
||||
}
|
||||
quoted = nextc(&rune);
|
||||
}
|
||||
|
||||
/* sort on span start */
|
||||
for(p = r; p < ep; p += 2){
|
||||
for(np = p; np < ep; np += 2)
|
||||
if(*np < *p){
|
||||
rune = np[0];
|
||||
np[0] = p[0];
|
||||
p[0] = rune;
|
||||
rune = np[1];
|
||||
np[1] = p[1];
|
||||
p[1] = rune;
|
||||
}
|
||||
}
|
||||
|
||||
/* merge spans */
|
||||
np = yyclassp->spans;
|
||||
p = r;
|
||||
if(r == ep)
|
||||
yyclassp->end = np;
|
||||
else {
|
||||
np[0] = *p++;
|
||||
np[1] = *p++;
|
||||
for(; p < ep; p += 2)
|
||||
if(p[0] <= np[1]){
|
||||
if(p[1] > np[1])
|
||||
np[1] = p[1];
|
||||
} else {
|
||||
np += 2;
|
||||
np[0] = p[0];
|
||||
np[1] = p[1];
|
||||
}
|
||||
yyclassp->end = np+2;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static Reprog*
|
||||
regcomp1(char *s, int literal, int dot_type)
|
||||
{
|
||||
int token;
|
||||
Reprog *pp;
|
||||
|
||||
/* get memory for the program */
|
||||
pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
|
||||
if(pp == 0){
|
||||
regerror("out of memory");
|
||||
return 0;
|
||||
}
|
||||
freep = pp->firstinst;
|
||||
classp = pp->class;
|
||||
errors = 0;
|
||||
|
||||
if(setjmp(regkaboom))
|
||||
goto out;
|
||||
|
||||
/* go compile the sucker */
|
||||
lexdone = 0;
|
||||
exprp = s;
|
||||
nclass = 0;
|
||||
nbra = 0;
|
||||
atorp = atorstack;
|
||||
andp = andstack;
|
||||
subidp = subidstack;
|
||||
lastwasand = FALSE;
|
||||
cursubid = 0;
|
||||
|
||||
/* Start with a low priority operator to prime parser */
|
||||
pushator(START-1);
|
||||
while((token = lex(literal, dot_type)) != END){
|
||||
if((token&0300) == OPERATOR)
|
||||
operator(token);
|
||||
else
|
||||
operand(token);
|
||||
}
|
||||
|
||||
/* Close with a low priority operator */
|
||||
evaluntil(START);
|
||||
|
||||
/* Force END */
|
||||
operand(END);
|
||||
evaluntil(START);
|
||||
#ifdef DEBUG
|
||||
dumpstack();
|
||||
#endif
|
||||
if(nbra)
|
||||
rcerror("unmatched left paren");
|
||||
--andp; /* points to first and only operand */
|
||||
pp->startinst = andp->first;
|
||||
#ifdef DEBUG
|
||||
dump(pp);
|
||||
#endif
|
||||
pp = optimize(pp);
|
||||
#ifdef DEBUG
|
||||
print("start: %d\n", andp->first-pp->firstinst);
|
||||
dump(pp);
|
||||
#endif
|
||||
out:
|
||||
if(errors){
|
||||
free(pp);
|
||||
pp = 0;
|
||||
}
|
||||
return pp;
|
||||
}
|
||||
|
||||
extern Reprog*
|
||||
regcomp(char *s)
|
||||
{
|
||||
return regcomp1(s, 0, ANY);
|
||||
}
|
||||
|
||||
extern Reprog*
|
||||
regcomplit(char *s)
|
||||
{
|
||||
return regcomp1(s, 1, ANY);
|
||||
}
|
||||
|
||||
extern Reprog*
|
||||
regcompnl(char *s)
|
||||
{
|
||||
return regcomp1(s, 0, ANYNL);
|
||||
}
|
||||
74
src/libregexp/regcomp.h
Normal file
74
src/libregexp/regcomp.h
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* substitution list
|
||||
*/
|
||||
#define uchar __reuchar
|
||||
typedef unsigned char uchar;
|
||||
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
|
||||
|
||||
#define NSUBEXP 32
|
||||
typedef struct Resublist Resublist;
|
||||
struct Resublist
|
||||
{
|
||||
Resub m[NSUBEXP];
|
||||
};
|
||||
|
||||
/* max character classes per program */
|
||||
extern Reprog RePrOg;
|
||||
#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass))
|
||||
|
||||
/* max rune ranges per character class */
|
||||
#define NCCRUNE (sizeof(Reclass)/sizeof(Rune))
|
||||
|
||||
/*
|
||||
* Actions and Tokens (Reinst types)
|
||||
*
|
||||
* 02xx are operators, value == precedence
|
||||
* 03xx are tokens, i.e. operands for operators
|
||||
*/
|
||||
#define RUNE 0177
|
||||
#define OPERATOR 0200 /* Bitmask of all operators */
|
||||
#define START 0200 /* Start, used for marker on stack */
|
||||
#define RBRA 0201 /* Right bracket, ) */
|
||||
#define LBRA 0202 /* Left bracket, ( */
|
||||
#define OR 0203 /* Alternation, | */
|
||||
#define CAT 0204 /* Concatentation, implicit operator */
|
||||
#define STAR 0205 /* Closure, * */
|
||||
#define PLUS 0206 /* a+ == aa* */
|
||||
#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */
|
||||
#define ANY 0300 /* Any character except newline, . */
|
||||
#define ANYNL 0301 /* Any character including newline, . */
|
||||
#define NOP 0302 /* No operation, internal use only */
|
||||
#define BOL 0303 /* Beginning of line, ^ */
|
||||
#define EOL 0304 /* End of line, $ */
|
||||
#define CCLASS 0305 /* Character class, [] */
|
||||
#define NCCLASS 0306 /* Negated character class, [] */
|
||||
#define END 0377 /* Terminate: match found */
|
||||
|
||||
/*
|
||||
* regexec execution lists
|
||||
*/
|
||||
#define LISTSIZE 10
|
||||
#define BIGLISTSIZE (10*LISTSIZE)
|
||||
typedef struct Relist Relist;
|
||||
struct Relist
|
||||
{
|
||||
Reinst* inst; /* Reinstruction of the thread */
|
||||
Resublist se; /* matched subexpressions in this thread */
|
||||
};
|
||||
typedef struct Reljunk Reljunk;
|
||||
struct Reljunk
|
||||
{
|
||||
Relist* relist[2];
|
||||
Relist* reliste[2];
|
||||
int starttype;
|
||||
Rune startchar;
|
||||
char* starts;
|
||||
char* eol;
|
||||
Rune* rstarts;
|
||||
Rune* reol;
|
||||
};
|
||||
|
||||
extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
|
||||
extern void _renewmatch(Resub*, int, Resublist*);
|
||||
extern Relist* _renewemptythread(Relist*, Reinst*, char*);
|
||||
extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*);
|
||||
14
src/libregexp/regerror.c
Normal file
14
src/libregexp/regerror.c
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
|
||||
void
|
||||
regerror(char *s)
|
||||
{
|
||||
char buf[132];
|
||||
|
||||
strcpy(buf, "regerror: ");
|
||||
strcat(buf, s);
|
||||
strcat(buf, "\n");
|
||||
write(2, buf, strlen(buf));
|
||||
exit(1);
|
||||
}
|
||||
219
src/libregexp/regexec.c
Normal file
219
src/libregexp/regexec.c
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
|
||||
/*
|
||||
* return 0 if no match
|
||||
* >0 if a match
|
||||
* <0 if we ran out of _relist space
|
||||
*/
|
||||
static int
|
||||
regexec1(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
int flag=0;
|
||||
Reinst *inst;
|
||||
Relist *tlp;
|
||||
char *s;
|
||||
int i, checkstart;
|
||||
Rune r, *rp, *ep;
|
||||
int n;
|
||||
Relist* tl; /* This list, next list */
|
||||
Relist* nl;
|
||||
Relist* tle; /* ends of this and next list */
|
||||
Relist* nle;
|
||||
int match;
|
||||
char *p;
|
||||
|
||||
match = 0;
|
||||
checkstart = j->starttype;
|
||||
if(mp)
|
||||
for(i=0; i<ms; i++) {
|
||||
mp[i].s.sp = 0;
|
||||
mp[i].e.ep = 0;
|
||||
}
|
||||
j->relist[0][0].inst = 0;
|
||||
j->relist[1][0].inst = 0;
|
||||
|
||||
/* Execute machine once for each character, including terminal NUL */
|
||||
s = j->starts;
|
||||
do{
|
||||
/* fast check for first char */
|
||||
if(checkstart) {
|
||||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = utfrune(s, j->startchar);
|
||||
if(p == 0)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol)
|
||||
break;
|
||||
p = utfrune(s, '\n');
|
||||
if(p == 0)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
r = *(uchar*)s;
|
||||
if(r < (Rune)Runeself)
|
||||
n = 1;
|
||||
else
|
||||
n = chartorune(&r, s);
|
||||
|
||||
/* switch run lists */
|
||||
tl = j->relist[flag];
|
||||
tle = j->reliste[flag];
|
||||
nl = j->relist[flag^=1];
|
||||
nle = j->reliste[flag];
|
||||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
if(match == 0)
|
||||
_renewemptythread(tl, progp->startinst, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
|
||||
for(inst = tlp->inst; ; inst = inst->u2.next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case LBRA:
|
||||
tlp->se.m[inst->u1.subid].s.sp = s;
|
||||
continue;
|
||||
case RBRA:
|
||||
tlp->se.m[inst->u1.subid].e.ep = s;
|
||||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol || *(s-1) == '\n')
|
||||
continue;
|
||||
break;
|
||||
case EOL:
|
||||
if(s == j->eol || r == 0 || r == '\n')
|
||||
continue;
|
||||
break;
|
||||
case CCLASS:
|
||||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NCCLASS:
|
||||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].e.ep = s;
|
||||
if(mp != 0)
|
||||
_renewmatch(mp, ms, &tlp->se);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(s == j->eol)
|
||||
break;
|
||||
checkstart = j->starttype && nl->inst==0;
|
||||
s += n;
|
||||
}while(r);
|
||||
return match;
|
||||
}
|
||||
|
||||
static int
|
||||
regexec2(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE];
|
||||
|
||||
/* mark space */
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j->reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
return regexec1(progp, bol, mp, ms, j);
|
||||
}
|
||||
|
||||
extern int
|
||||
regexec(Reprog *progp, /* program to run */
|
||||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements at mp */
|
||||
{
|
||||
Reljunk j;
|
||||
Relist relist0[LISTSIZE], relist1[LISTSIZE];
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* use user-specified starting/ending location if specified
|
||||
*/
|
||||
j.starts = bol;
|
||||
j.eol = 0;
|
||||
if(mp && ms>0){
|
||||
if(mp->s.sp)
|
||||
j.starts = mp->s.sp;
|
||||
if(mp->e.ep)
|
||||
j.eol = mp->e.ep;
|
||||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->u1.r;
|
||||
}
|
||||
if(progp->startinst->type == BOL)
|
||||
j.starttype = BOL;
|
||||
|
||||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = regexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
rv = regexec2(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
return -1;
|
||||
}
|
||||
227
src/libregexp/regexp9.3
Normal file
227
src/libregexp/regexp9.3
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
.TH REGEXP9 3
|
||||
.de EX
|
||||
.nf
|
||||
.ft B
|
||||
..
|
||||
.de EE
|
||||
.fi
|
||||
.ft R
|
||||
..
|
||||
.de LR
|
||||
.if t .BR \\$1 \\$2
|
||||
.if n .RB ` \\$1 '\\$2
|
||||
..
|
||||
.de L
|
||||
.nh
|
||||
.if t .B \\$1
|
||||
.if n .RB ` \\$1 '
|
||||
..
|
||||
.SH NAME
|
||||
regcomp, regcomplit, regcompnl, regexec, regsub, regerror \- Plan 9 regular expression library
|
||||
.SH SYNOPSIS
|
||||
.B #include <regexp9.h>
|
||||
.PP
|
||||
.ta \w'\fLRegprog 'u
|
||||
.B
|
||||
Reprog *regcomp(char *exp)
|
||||
.PP
|
||||
.B
|
||||
Reprog *regcomplit(char *exp)
|
||||
.PP
|
||||
.B
|
||||
Reprog *regcompnl(char *exp)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
int regexec(Reprog *prog, char *string, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
void regsub(char *source, char *dest, int dlen, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
int rregexec(Reprog *prog, Rune *string, Resub *match, int msize)
|
||||
.PP
|
||||
.nf
|
||||
.B
|
||||
void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize)
|
||||
.PP
|
||||
.B
|
||||
void regerror(char *msg)
|
||||
.SH DESCRIPTION
|
||||
.I Regcomp
|
||||
compiles a
|
||||
regular expression and returns
|
||||
a pointer to the generated description.
|
||||
The space is allocated by
|
||||
.IR malloc (3)
|
||||
and may be released by
|
||||
.IR free .
|
||||
Regular expressions are exactly as in
|
||||
.IR regexp9 (7).
|
||||
.PP
|
||||
.I Regcomplit
|
||||
is like
|
||||
.I regcomp
|
||||
except that all characters are treated literally.
|
||||
.I Regcompnl
|
||||
is like
|
||||
.I regcomp
|
||||
except that the
|
||||
.B .
|
||||
metacharacter matches all characters, including newlines.
|
||||
.PP
|
||||
.I Regexec
|
||||
matches a null-terminated
|
||||
.I string
|
||||
against the compiled regular expression in
|
||||
.IR prog .
|
||||
If it matches,
|
||||
.I regexec
|
||||
returns
|
||||
.B 1
|
||||
and fills in the array
|
||||
.I match
|
||||
with character pointers to the substrings of
|
||||
.I string
|
||||
that correspond to the
|
||||
parenthesized subexpressions of
|
||||
.IR exp :
|
||||
.BI match[ i ].sp
|
||||
points to the beginning and
|
||||
.BI match[ i ].ep
|
||||
points just beyond
|
||||
the end of the
|
||||
.IR i th
|
||||
substring.
|
||||
(Subexpression
|
||||
.I i
|
||||
begins at the
|
||||
.IR i th
|
||||
left parenthesis, counting from 1.)
|
||||
Pointers in
|
||||
.B match[0]
|
||||
pick out the substring that corresponds to
|
||||
the whole regular expression.
|
||||
Unused elements of
|
||||
.I match
|
||||
are filled with zeros.
|
||||
Matches involving
|
||||
.LR * ,
|
||||
.LR + ,
|
||||
and
|
||||
.L ?
|
||||
are extended as far as possible.
|
||||
The number of array elements in
|
||||
.I match
|
||||
is given by
|
||||
.IR msize .
|
||||
The structure of elements of
|
||||
.I match
|
||||
is:
|
||||
.IP
|
||||
.EX
|
||||
typedef struct {
|
||||
union {
|
||||
char *sp;
|
||||
Rune *rsp;
|
||||
} s;
|
||||
union {
|
||||
char *ep;
|
||||
Rune *rep;
|
||||
} e;
|
||||
} Resub;
|
||||
.EE
|
||||
.LP
|
||||
If
|
||||
.B match[0].s.sp
|
||||
is nonzero on entry,
|
||||
.I regexec
|
||||
starts matching at that point within
|
||||
.IR string .
|
||||
If
|
||||
.B match[0].e.ep
|
||||
is nonzero on entry,
|
||||
the last character matched is the one
|
||||
preceding that point.
|
||||
.PP
|
||||
.I Regsub
|
||||
places in
|
||||
.I dest
|
||||
a substitution instance of
|
||||
.I source
|
||||
in the context of the last
|
||||
.I regexec
|
||||
performed using
|
||||
.IR match .
|
||||
Each instance of
|
||||
.BI \e n\f1,
|
||||
where
|
||||
.I n
|
||||
is a digit, is replaced by the
|
||||
string delimited by
|
||||
.BI match[ n ].s.sp
|
||||
and
|
||||
.BI match[ n ].e.ep\f1.
|
||||
Each instance of
|
||||
.L &
|
||||
is replaced by the string delimited by
|
||||
.B match[0].s.sp
|
||||
and
|
||||
.BR match[0].e.ep .
|
||||
The substitution will always be null terminated and
|
||||
trimmed to fit into dlen bytes.
|
||||
.PP
|
||||
.IR Regerror ,
|
||||
called whenever an error is detected in
|
||||
.IR regcomp ,
|
||||
writes the string
|
||||
.I msg
|
||||
on the standard error file and exits.
|
||||
.I Regerror
|
||||
can be replaced to perform
|
||||
special error processing.
|
||||
If the user supplied
|
||||
.I regerror
|
||||
returns rather than exits,
|
||||
.I regcomp
|
||||
will return 0.
|
||||
.PP
|
||||
.I Rregexec
|
||||
and
|
||||
.I rregsub
|
||||
are variants of
|
||||
.I regexec
|
||||
and
|
||||
.I regsub
|
||||
that use strings of
|
||||
.B Runes
|
||||
instead of strings of
|
||||
.BR chars .
|
||||
With these routines, the
|
||||
.I rsp
|
||||
and
|
||||
.I rep
|
||||
fields of the
|
||||
.I match
|
||||
array elements should be used.
|
||||
.SH "SEE ALSO"
|
||||
.IR grep (1),
|
||||
.IR regexp9 (7)
|
||||
.SH DIAGNOSTICS
|
||||
.I Regcomp
|
||||
returns
|
||||
.B 0
|
||||
for an illegal expression
|
||||
or other failure.
|
||||
.I Regexec
|
||||
returns 0
|
||||
if
|
||||
.I string
|
||||
is not matched.
|
||||
.SH HISTORY
|
||||
This particular regular expression was first written by Rob Pike for Plan 9.
|
||||
It has also appeared as part of the Inferno operating system.
|
||||
.SH BUGS
|
||||
There is no way to specify or match a NUL character; NULs terminate patterns and strings.
|
||||
150
src/libregexp/regexp9.7
Normal file
150
src/libregexp/regexp9.7
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
.TH REGEXP9 7
|
||||
.de EX
|
||||
.nf
|
||||
.ft B
|
||||
..
|
||||
.de EE
|
||||
.fi
|
||||
.ft R
|
||||
..
|
||||
.de LR
|
||||
.if t .BR \\$1 \\$2
|
||||
.if n .RB ` \\$1 '\\$2
|
||||
..
|
||||
.de L
|
||||
.nh
|
||||
.if t .B \\$1
|
||||
.if n .RB ` \\$1 '
|
||||
..
|
||||
.SH NAME
|
||||
regexp9 \- Plan 9 regular expression notation
|
||||
.SH DESCRIPTION
|
||||
This manual page describes the regular expression
|
||||
syntax used by the Plan 9 regular expression library
|
||||
.IR regexp9 (3).
|
||||
It is the form used by
|
||||
.IR egrep (1)
|
||||
before
|
||||
.I egrep
|
||||
got complicated.
|
||||
.PP
|
||||
A
|
||||
.I "regular expression"
|
||||
specifies
|
||||
a set of strings of characters.
|
||||
A member of this set of strings is said to be
|
||||
.I matched
|
||||
by the regular expression. In many applications
|
||||
a delimiter character, commonly
|
||||
.LR / ,
|
||||
bounds a regular expression.
|
||||
In the following specification for regular expressions
|
||||
the word `character' means any character (rune) but newline.
|
||||
.PP
|
||||
The syntax for a regular expression
|
||||
.B e0
|
||||
is
|
||||
.IP
|
||||
.EX
|
||||
e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')'
|
||||
|
||||
e2: e3
|
||||
| e2 REP
|
||||
|
||||
REP: '*' | '+' | '?'
|
||||
|
||||
e1: e2
|
||||
| e1 e2
|
||||
|
||||
e0: e1
|
||||
| e0 '|' e1
|
||||
.EE
|
||||
.PP
|
||||
A
|
||||
.B literal
|
||||
is any non-metacharacter, or a metacharacter
|
||||
(one of
|
||||
.BR .*+?[]()|\e^$ ),
|
||||
or the delimiter
|
||||
preceded by
|
||||
.LR \e .
|
||||
.PP
|
||||
A
|
||||
.B charclass
|
||||
is a nonempty string
|
||||
.I s
|
||||
bracketed
|
||||
.BI [ \|s\| ]
|
||||
(or
|
||||
.BI [^ s\| ]\fR);
|
||||
it matches any character in (or not in)
|
||||
.IR s .
|
||||
A negated character class never
|
||||
matches newline.
|
||||
A substring
|
||||
.IB a - b\f1,
|
||||
with
|
||||
.I a
|
||||
and
|
||||
.I b
|
||||
in ascending
|
||||
order, stands for the inclusive
|
||||
range of
|
||||
characters between
|
||||
.I a
|
||||
and
|
||||
.IR b .
|
||||
In
|
||||
.IR s ,
|
||||
the metacharacters
|
||||
.LR - ,
|
||||
.LR ] ,
|
||||
an initial
|
||||
.LR ^ ,
|
||||
and the regular expression delimiter
|
||||
must be preceded by a
|
||||
.LR \e ;
|
||||
other metacharacters
|
||||
have no special meaning and
|
||||
may appear unescaped.
|
||||
.PP
|
||||
A
|
||||
.L .
|
||||
matches any character.
|
||||
.PP
|
||||
A
|
||||
.L ^
|
||||
matches the beginning of a line;
|
||||
.L $
|
||||
matches the end of the line.
|
||||
.PP
|
||||
The
|
||||
.B REP
|
||||
operators match zero or more
|
||||
.RB ( * ),
|
||||
one or more
|
||||
.RB ( + ),
|
||||
zero or one
|
||||
.RB ( ? ),
|
||||
instances respectively of the preceding regular expression
|
||||
.BR e2 .
|
||||
.PP
|
||||
A concatenated regular expression,
|
||||
.BR "e1\|e2" ,
|
||||
matches a match to
|
||||
.B e1
|
||||
followed by a match to
|
||||
.BR e2 .
|
||||
.PP
|
||||
An alternative regular expression,
|
||||
.BR "e0\||\|e1" ,
|
||||
matches either a match to
|
||||
.B e0
|
||||
or a match to
|
||||
.BR e1 .
|
||||
.PP
|
||||
A match to any part of a regular expression
|
||||
extends as far as possible without preventing
|
||||
a match to the remainder of the regular expression.
|
||||
.SH "SEE ALSO"
|
||||
.IR regexp9 (3)
|
||||
71
src/libregexp/regexp9.h
Normal file
71
src/libregexp/regexp9.h
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#ifndef _REGEXP9H_
|
||||
|
||||
#define _REGEXP9H_ 1
|
||||
#include <utf.h>
|
||||
|
||||
typedef struct Resub Resub;
|
||||
typedef struct Reclass Reclass;
|
||||
typedef struct Reinst Reinst;
|
||||
typedef struct Reprog Reprog;
|
||||
|
||||
/*
|
||||
* Sub expression matches
|
||||
*/
|
||||
struct Resub{
|
||||
union
|
||||
{
|
||||
char *sp;
|
||||
Rune *rsp;
|
||||
}s;
|
||||
union
|
||||
{
|
||||
char *ep;
|
||||
Rune *rep;
|
||||
}e;
|
||||
};
|
||||
|
||||
/*
|
||||
* character class, each pair of rune's defines a range
|
||||
*/
|
||||
struct Reclass{
|
||||
Rune *end;
|
||||
Rune spans[64];
|
||||
};
|
||||
|
||||
/*
|
||||
* Machine instructions
|
||||
*/
|
||||
struct Reinst{
|
||||
int type;
|
||||
union {
|
||||
Reclass *cp; /* class pointer */
|
||||
Rune r; /* character */
|
||||
int subid; /* sub-expression id for RBRA and LBRA */
|
||||
Reinst *right; /* right child of OR */
|
||||
}u1;
|
||||
union { /* regexp relies on these two being in the same union */
|
||||
Reinst *left; /* left child of OR */
|
||||
Reinst *next; /* next instruction for CAT & LBRA */
|
||||
}u2;
|
||||
};
|
||||
|
||||
/*
|
||||
* Reprogram definition
|
||||
*/
|
||||
struct Reprog{
|
||||
Reinst *startinst; /* start pc */
|
||||
Reclass class[16]; /* .data */
|
||||
Reinst firstinst[5]; /* .text */
|
||||
};
|
||||
|
||||
extern Reprog *regcomp(char*);
|
||||
extern Reprog *regcomplit(char*);
|
||||
extern Reprog *regcompnl(char*);
|
||||
extern void regerror(char*);
|
||||
extern int regexec(Reprog*, char*, Resub*, int);
|
||||
extern void regsub(char*, char*, int, Resub*, int);
|
||||
|
||||
extern int rregexec(Reprog*, Rune*, Resub*, int);
|
||||
extern void rregsub(Rune*, Rune*, Resub*, int);
|
||||
|
||||
#endif
|
||||
62
src/libregexp/regsub.c
Normal file
62
src/libregexp/regsub.c
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
|
||||
/* substitute into one string using the matches from the last regexec() */
|
||||
extern void
|
||||
regsub(char *sp, /* source string */
|
||||
char *dp, /* destination string */
|
||||
int dlen,
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements pointed to by mp */
|
||||
{
|
||||
char *ssp, *ep;
|
||||
int i;
|
||||
|
||||
ep = dp+dlen-1;
|
||||
while(*sp != '\0'){
|
||||
if(*sp == '\\'){
|
||||
switch(*++sp){
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
i = *sp-'0';
|
||||
if(mp[i].s.sp != 0 && mp!=0 && ms>i)
|
||||
for(ssp = mp[i].s.sp;
|
||||
ssp < mp[i].e.ep;
|
||||
ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
break;
|
||||
case '\\':
|
||||
if(dp < ep)
|
||||
*dp++ = '\\';
|
||||
break;
|
||||
case '\0':
|
||||
sp--;
|
||||
break;
|
||||
default:
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
}
|
||||
}else if(*sp == '&'){
|
||||
if(mp[0].s.sp != 0 && mp!=0 && ms>0)
|
||||
if(mp[0].s.sp != 0)
|
||||
for(ssp = mp[0].s.sp;
|
||||
ssp < mp[0].e.ep; ssp++)
|
||||
if(dp < ep)
|
||||
*dp++ = *ssp;
|
||||
}else
|
||||
if(dp < ep)
|
||||
*dp++ = *sp;
|
||||
sp++;
|
||||
}
|
||||
*dp = '\0';
|
||||
}
|
||||
34
src/libregexp/rpm.spec
Normal file
34
src/libregexp/rpm.spec
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
Summary: Simple regular expression library from Plan 9
|
||||
Name: libregexp9
|
||||
Version: 2.0
|
||||
Release: 1
|
||||
Group: Development/C
|
||||
Copyright: Public Domain
|
||||
Packager: Russ Cox <rsc@post.harvard.edu>
|
||||
Source: http://pdos.lcs.mit.edu/~rsc/software/libregexp9-2.0.tgz
|
||||
URL: http://pdos.lcs.mit.edu/~rsc/software/#libregexp9
|
||||
Requires: libfmt libutf
|
||||
|
||||
%description
|
||||
Libregexp9 is a port of Plan 9's regexp library.
|
||||
It is small and simple and provides the traditional
|
||||
extended regular expressions (as opposed to the
|
||||
current extended regular expressions, which add {}
|
||||
and various \x character classes, among other
|
||||
complications).
|
||||
|
||||
http://plan9.bell-labs.com/magic/man2html/2/regexp
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
make
|
||||
|
||||
%install
|
||||
make install
|
||||
|
||||
%files
|
||||
/usr/local/include/regexp9.h
|
||||
/usr/local/lib/libregexp9.a
|
||||
/usr/local/man/man3/regexp9.3
|
||||
/usr/local/man/man7/regexp9.7
|
||||
26
src/libregexp/rregaux.c
Normal file
26
src/libregexp/rregaux.c
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
extern Relist*
|
||||
_rrenewemptythread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
Rune *rsp) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(rsp < p->se.m[0].s.rsp) {
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
memset((void *)&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
213
src/libregexp/rregexec.c
Normal file
213
src/libregexp/rregexec.c
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
/*
|
||||
* return 0 if no match
|
||||
* >0 if a match
|
||||
* <0 if we ran out of _relist space
|
||||
*/
|
||||
static int
|
||||
rregexec1(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j)
|
||||
{
|
||||
int flag=0;
|
||||
Reinst *inst;
|
||||
Relist *tlp;
|
||||
Rune *s;
|
||||
int i, checkstart;
|
||||
Rune r, *rp, *ep;
|
||||
Relist* tl; /* This list, next list */
|
||||
Relist* nl;
|
||||
Relist* tle; /* ends of this and next list */
|
||||
Relist* nle;
|
||||
int match;
|
||||
|
||||
match = 0;
|
||||
checkstart = j->startchar;
|
||||
if(mp)
|
||||
for(i=0; i<ms; i++) {
|
||||
mp[i].s.rsp = 0;
|
||||
mp[i].e.rep = 0;
|
||||
}
|
||||
j->relist[0][0].inst = 0;
|
||||
j->relist[1][0].inst = 0;
|
||||
|
||||
/* Execute machine once for each character, including terminal NUL */
|
||||
s = j->rstarts;
|
||||
do{
|
||||
|
||||
/* fast check for first char */
|
||||
if(checkstart) {
|
||||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
while(*s != j->startchar) {
|
||||
if(*s == 0)
|
||||
return match;
|
||||
s++;
|
||||
}
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol)
|
||||
break;
|
||||
while(*s != '\n') {
|
||||
if(*s == 0)
|
||||
return match;
|
||||
s++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r = *s;
|
||||
|
||||
/* switch run lists */
|
||||
tl = j->relist[flag];
|
||||
tle = j->reliste[flag];
|
||||
nl = j->relist[flag^=1];
|
||||
nle = j->reliste[flag];
|
||||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
_rrenewemptythread(tl, progp->startinst, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){
|
||||
for(inst=tlp->inst; ; inst = inst->u2.next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case LBRA:
|
||||
tlp->se.m[inst->u1.subid].s.rsp = s;
|
||||
continue;
|
||||
case RBRA:
|
||||
tlp->se.m[inst->u1.subid].e.rep = s;
|
||||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
if(s == bol || *(s-1) == '\n')
|
||||
continue;
|
||||
break;
|
||||
case EOL:
|
||||
if(s == j->reol || r == 0 || r == '\n')
|
||||
continue;
|
||||
break;
|
||||
case CCLASS:
|
||||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case NCCLASS:
|
||||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].e.rep = s;
|
||||
if(mp != 0)
|
||||
_renewmatch(mp, ms, &tlp->se);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(s == j->reol)
|
||||
break;
|
||||
checkstart = j->startchar && nl->inst==0;
|
||||
s++;
|
||||
}while(r);
|
||||
return match;
|
||||
}
|
||||
|
||||
static int
|
||||
rregexec2(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
|
||||
|
||||
/* mark space */
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j->reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
return rregexec1(progp, bol, mp, ms, j);
|
||||
}
|
||||
|
||||
extern int
|
||||
rregexec(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements at mp */
|
||||
{
|
||||
Reljunk j;
|
||||
Relist relist0[LISTSIZE], relist1[LISTSIZE];
|
||||
int rv;
|
||||
|
||||
/*
|
||||
* use user-specified starting/ending location if specified
|
||||
*/
|
||||
j.rstarts = bol;
|
||||
j.reol = 0;
|
||||
if(mp && ms>0){
|
||||
if(mp->s.sp)
|
||||
j.rstarts = mp->s.rsp;
|
||||
if(mp->e.ep)
|
||||
j.reol = mp->e.rep;
|
||||
}
|
||||
j.starttype = 0;
|
||||
j.startchar = 0;
|
||||
if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
|
||||
j.starttype = RUNE;
|
||||
j.startchar = progp->startinst->u1.r;
|
||||
}
|
||||
if(progp->startinst->type == BOL)
|
||||
j.starttype = BOL;
|
||||
|
||||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = rregexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
rv = rregexec2(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
return rv;
|
||||
return -1;
|
||||
}
|
||||
55
src/libregexp/rregsub.c
Normal file
55
src/libregexp/rregsub.c
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
#include "lib9.h"
|
||||
#include "regexp9.h"
|
||||
|
||||
/* substitute into one string using the matches from the last regexec() */
|
||||
extern void
|
||||
rregsub(Rune *sp, /* source string */
|
||||
Rune *dp, /* destination string */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms) /* number of elements pointed to by mp */
|
||||
{
|
||||
Rune *ssp;
|
||||
int i;
|
||||
|
||||
while(*sp != '\0'){
|
||||
if(*sp == '\\'){
|
||||
switch(*++sp){
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
i = *sp-'0';
|
||||
if(mp[i].s.rsp != 0 && mp!=0 && ms>i)
|
||||
for(ssp = mp[i].s.rsp;
|
||||
ssp < mp[i].e.rep;
|
||||
ssp++)
|
||||
*dp++ = *ssp;
|
||||
break;
|
||||
case '\\':
|
||||
*dp++ = '\\';
|
||||
break;
|
||||
case '\0':
|
||||
sp--;
|
||||
break;
|
||||
default:
|
||||
*dp++ = *sp;
|
||||
break;
|
||||
}
|
||||
}else if(*sp == '&'){
|
||||
if(mp[0].s.rsp != 0 && mp!=0 && ms>0)
|
||||
if(mp[0].s.rsp != 0)
|
||||
for(ssp = mp[0].s.rsp;
|
||||
ssp < mp[0].e.rep; ssp++)
|
||||
*dp++ = *ssp;
|
||||
}else
|
||||
*dp++ = *sp;
|
||||
sp++;
|
||||
}
|
||||
*dp = '\0';
|
||||
}
|
||||
46
src/libregexp/test.c
Normal file
46
src/libregexp/test.c
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#include "lib9.h"
|
||||
#include <regexp9.h>
|
||||
|
||||
struct x
|
||||
{
|
||||
char *re;
|
||||
char *s;
|
||||
Reprog *p;
|
||||
};
|
||||
|
||||
struct x t[] = {
|
||||
{ "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 },
|
||||
{ "^local!(.*)$", "/mail/box/\\1/mbox", 0 },
|
||||
{ "^plan9!(.*)$", "\\1", 0 },
|
||||
{ "^helix!(.*)$", "\\1", 0 },
|
||||
{ "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 },
|
||||
{ "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 },
|
||||
{ "^[^!]*\\.[^!]*!.*$", "inet!&", 0 },
|
||||
{ "^\xE2\x98\xBA$", "smiley", 0 },
|
||||
{ "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 },
|
||||
{ "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 },
|
||||
{ 0, 0, 0 },
|
||||
};
|
||||
|
||||
main(int ac, char **av)
|
||||
{
|
||||
Resub rs[10];
|
||||
char dst[128];
|
||||
int n;
|
||||
struct x *tp;
|
||||
|
||||
for(tp = t; tp->re; tp++)
|
||||
tp->p = regcomp(tp->re);
|
||||
|
||||
|
||||
for(tp = t; tp->re; tp++){
|
||||
print("%s VIA %s", av[1], tp->re);
|
||||
memset(rs, 0, sizeof rs);
|
||||
if(regexec(tp->p, av[1], rs, 10)){
|
||||
regsub(tp->s, dst, sizeof dst, rs, 10);
|
||||
print(" sub %s -> %s", tp->s, dst);
|
||||
}
|
||||
print("\n");
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
20
src/libregexp/test2.c
Normal file
20
src/libregexp/test2.c
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
#include "lib9.h"
|
||||
#include <regexp9.h>
|
||||
|
||||
|
||||
main(int ac, char **av)
|
||||
{
|
||||
Resub rs[10];
|
||||
Reprog *p;
|
||||
char *s;
|
||||
int i;
|
||||
|
||||
p = regcomp("[^a-z]");
|
||||
s = "\n";
|
||||
if(regexec(p, s, rs, 10))
|
||||
print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep);
|
||||
s = "0";
|
||||
if(regexec(p, s, rs, 10))
|
||||
print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep);
|
||||
exit(0);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue