summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2004-10-20 21:42:31 +0000
committerPaolo Bonzini <bonzini@gnu.org>2004-10-20 21:42:31 +0000
commit9c9919efe2166efd32409054005619062624226c (patch)
tree29cf0853d8049b5d73337285c437fd35eb96086e
downloadsed-9c9919efe2166efd32409054005619062624226c.tar.gz
initial import
(automatically generated log message) git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--base-0
-rw-r--r--ABOUT-NLS324
-rw-r--r--AUTHORS5
-rw-r--r--BUGS103
-rw-r--r--COPYING340
-rw-r--r--COPYING.DOC355
-rw-r--r--ChangeLog2615
-rw-r--r--INSTALL229
-rw-r--r--Makefile.am30
-rw-r--r--NEWS426
-rw-r--r--README13
-rw-r--r--README-alpha8
-rw-r--r--README.boot23
-rw-r--r--THANKS46
-rw-r--r--basicdefs.h202
-rwxr-xr-xbootstrap.sh82
-rwxr-xr-xbootstrap.sh.in82
-rw-r--r--config/getline.m441
-rw-r--r--config/gettext-ver.m41
-rwxr-xr-xconfig/help2man559
-rw-r--r--config/stdbool.m466
-rw-r--r--config/strverscmp.m424
-rwxr-xr-xconfig/texi2dvi660
-rw-r--r--configure.ac139
-rw-r--r--doc/Makefile.am52
-rw-r--r--doc/config.texi9
-rwxr-xr-xdoc/groupify.sed59
-rw-r--r--doc/sed-in.texi4026
-rw-r--r--doc/sed.1374
-rw-r--r--doc/sed.texi4193
-rw-r--r--doc/sed.x313
-rw-r--r--lib/Makefile.am16
-rw-r--r--lib/alloca.c504
-rw-r--r--lib/getline.c110
-rw-r--r--lib/getopt.c1049
-rw-r--r--lib/getopt.h133
-rw-r--r--lib/getopt1.c190
-rw-r--r--lib/memchr.c200
-rw-r--r--lib/memcmp.c396
-rw-r--r--lib/memmove.c76
-rw-r--r--lib/mkstemp.c70
-rw-r--r--lib/obstack.c569
-rw-r--r--lib/obstack.h605
-rw-r--r--lib/regcomp.c3793
-rw-r--r--lib/regex.c97
-rw-r--r--lib/regex_.h588
-rw-r--r--lib/regex_internal.c1653
-rw-r--r--lib/regex_internal.h807
-rw-r--r--lib/regexec.c4269
-rw-r--r--lib/stdbool_.h47
-rw-r--r--lib/strerror.c52
-rw-r--r--lib/strverscmp.c132
-rw-r--r--lib/strverscmp.h20
-rw-r--r--lib/utils.c520
-rw-r--r--lib/utils.h48
-rw-r--r--po/ChangeLog45
-rw-r--r--po/POTFILES.in8
-rw-r--r--po/af.po433
-rw-r--r--po/ca.po446
-rw-r--r--po/cs.po425
-rw-r--r--po/da.po430
-rw-r--r--po/de.po435
-rw-r--r--po/el.po435
-rw-r--r--po/eo.po430
-rw-r--r--po/es.po433
-rw-r--r--po/et.po436
-rw-r--r--po/fi.po433
-rw-r--r--po/fr.po453
-rw-r--r--po/ga.po446
-rw-r--r--po/gl.po433
-rw-r--r--po/he.po423
-rw-r--r--po/hr.po431
-rw-r--r--po/hu.po431
-rw-r--r--po/id.po431
-rw-r--r--po/it.po508
-rw-r--r--po/ja.po444
-rw-r--r--po/ko.po423
-rw-r--r--po/nl.po435
-rw-r--r--po/pl.po445
-rw-r--r--po/pt_BR.po433
-rw-r--r--po/ro.po435
-rw-r--r--po/ru.po442
-rw-r--r--po/sed.pot400
-rw-r--r--po/sk.po435
-rw-r--r--po/sl.po435
-rw-r--r--po/sr.po432
-rw-r--r--po/sv.po446
-rw-r--r--po/tr.po439
-rw-r--r--po/zh_CN.po419
-rw-r--r--sed/Makefile.am18
-rw-r--r--sed/compile.c1721
-rw-r--r--sed/execute.c1747
-rw-r--r--sed/fmt.c587
-rw-r--r--sed/mbcs.c56
-rw-r--r--sed/regexp.c238
-rw-r--r--sed/sed.c308
-rw-r--r--sed/sed.h254
-rw-r--r--testsuite/0range.good1
-rw-r--r--testsuite/0range.inp6
-rw-r--r--testsuite/0range.sed1
-rw-r--r--testsuite/8bit.good9
-rw-r--r--testsuite/8bit.inp9
-rw-r--r--testsuite/8bit.sed21
-rw-r--r--testsuite/8to7.good14
-rw-r--r--testsuite/8to7.inp9
-rw-r--r--testsuite/8to7.sed1
-rw-r--r--testsuite/BOOST.tests829
-rw-r--r--testsuite/Makefile.am89
-rw-r--r--testsuite/Makefile.tests154
-rw-r--r--testsuite/PCRE.tests2367
-rw-r--r--testsuite/SPENCER.tests528
-rw-r--r--testsuite/allsub.good1
-rw-r--r--testsuite/allsub.inp1
-rw-r--r--testsuite/allsub.sed1
-rw-r--r--testsuite/binary.good8
-rw-r--r--testsuite/binary.inp4
-rw-r--r--testsuite/binary.sed189
-rw-r--r--testsuite/binary2.sed226
-rw-r--r--testsuite/binary3.sed204
-rw-r--r--testsuite/bkslashes.good2
-rw-r--r--testsuite/bkslashes.inp1
-rw-r--r--testsuite/bkslashes.sed3
-rw-r--r--testsuite/bsd.good1737
-rwxr-xr-xtestsuite/bsd.sh434
-rw-r--r--testsuite/bug-regex10.c61
-rw-r--r--testsuite/bug-regex11.c135
-rw-r--r--testsuite/bug-regex12.c73
-rw-r--r--testsuite/bug-regex13.c103
-rw-r--r--testsuite/bug-regex14.c54
-rw-r--r--testsuite/bug-regex15.c47
-rw-r--r--testsuite/bug-regex16.c35
-rw-r--r--testsuite/bug-regex21.c45
-rw-r--r--testsuite/bug-regex7.c92
-rw-r--r--testsuite/bug-regex8.c84
-rw-r--r--testsuite/bug-regex9.c67
-rw-r--r--testsuite/classes.good4
-rw-r--r--testsuite/classes.inp6
-rw-r--r--testsuite/classes.sed2
-rw-r--r--testsuite/cv-vars.good4
-rw-r--r--testsuite/cv-vars.inp6
-rw-r--r--testsuite/cv-vars.sed2
-rw-r--r--testsuite/dc.good3
-rw-r--r--testsuite/dc.inp14
-rw-r--r--testsuite/dc.sed322
-rw-r--r--testsuite/distrib.good29
-rw-r--r--testsuite/distrib.inp28
-rw-r--r--testsuite/distrib.sed56
-rw-r--r--testsuite/distrib.sh63
-rw-r--r--testsuite/dollar.good4
-rw-r--r--testsuite/dollar.inp4
-rw-r--r--testsuite/dollar.sed1
-rw-r--r--testsuite/empty.good2
-rw-r--r--testsuite/empty.inp2
-rw-r--r--testsuite/empty.sed1
-rw-r--r--testsuite/enable.good3
-rw-r--r--testsuite/enable.inp3
-rw-r--r--testsuite/enable.sed2
-rw-r--r--testsuite/eval.good40
-rw-r--r--testsuite/eval.in25
-rw-r--r--testsuite/eval.inp5
-rw-r--r--testsuite/eval.sed46
-rw-r--r--testsuite/factor.good15
-rw-r--r--testsuite/factor.inp8
-rw-r--r--testsuite/factor.sed76
-rw-r--r--testsuite/fasts.good14
-rw-r--r--testsuite/fasts.inp1
-rw-r--r--testsuite/fasts.sed46
-rw-r--r--testsuite/flipcase.good25
-rw-r--r--testsuite/flipcase.inp25
-rw-r--r--testsuite/flipcase.sed1
-rw-r--r--testsuite/head.good3
-rw-r--r--testsuite/head.inp9
-rw-r--r--testsuite/head.sed1
-rw-r--r--testsuite/inclib.good34
-rw-r--r--testsuite/inclib.inp34
-rw-r--r--testsuite/inclib.sed2
-rw-r--r--testsuite/insens.good2
-rw-r--r--testsuite/insens.inp1
-rw-r--r--testsuite/insens.sed4
-rw-r--r--testsuite/khadafy.good32
-rw-r--r--testsuite/khadafy.inp32
-rw-r--r--testsuite/khadafy.sed2
-rw-r--r--testsuite/linecnt.good110
-rw-r--r--testsuite/linecnt.inp55
-rw-r--r--testsuite/linecnt.sed1
-rw-r--r--testsuite/mac-mf.good200
-rw-r--r--testsuite/mac-mf.inp200
-rw-r--r--testsuite/mac-mf.sed154
-rw-r--r--testsuite/madding.good1
-rw-r--r--testsuite/madding.inp1
-rw-r--r--testsuite/madding.sed8
-rw-r--r--testsuite/manis.good22
-rw-r--r--testsuite/manis.inp22
-rw-r--r--testsuite/manis.sed6
-rw-r--r--testsuite/middle.good3
-rw-r--r--testsuite/middle.inp9
-rw-r--r--testsuite/middle.sed1
-rw-r--r--testsuite/newjis.good4
-rw-r--r--testsuite/newjis.inp4
-rw-r--r--testsuite/newjis.sed1
-rw-r--r--testsuite/noeol.good3
-rw-r--r--testsuite/noeol.inp3
-rw-r--r--testsuite/noeol.sed1
-rw-r--r--testsuite/noeolw.1good7
-rw-r--r--testsuite/noeolw.2good3
-rw-r--r--testsuite/noeolw.good12
-rw-r--r--testsuite/noeolw.sed10
-rw-r--r--testsuite/numsub.good1
-rw-r--r--testsuite/numsub.inp2
-rw-r--r--testsuite/numsub.sed7
-rw-r--r--testsuite/numsub2.good0
-rw-r--r--testsuite/numsub2.inp1
-rw-r--r--testsuite/numsub2.sed1
-rw-r--r--testsuite/numsub3.good0
-rw-r--r--testsuite/numsub3.inp1
-rw-r--r--testsuite/numsub3.sed1
-rw-r--r--testsuite/numsub4.good0
-rw-r--r--testsuite/numsub4.inp1
-rw-r--r--testsuite/numsub4.sed1
-rw-r--r--testsuite/numsub5.good0
-rw-r--r--testsuite/numsub5.inp1
-rw-r--r--testsuite/numsub5.sed1
-rw-r--r--testsuite/ptestcases.h326
-rw-r--r--testsuite/readin.good21
-rw-r--r--testsuite/readin.in21
-rw-r--r--testsuite/readin.inp14
-rw-r--r--testsuite/readin.sed1
-rw-r--r--testsuite/recall.good7
-rw-r--r--testsuite/recall.inp1
-rw-r--r--testsuite/recall.sed7
-rw-r--r--testsuite/runptests.c123
-rwxr-xr-xtestsuite/runtest18
-rw-r--r--testsuite/runtests.c138
-rw-r--r--testsuite/sep.good3
-rw-r--r--testsuite/sep.inp3
-rw-r--r--testsuite/sep.sed4
-rw-r--r--testsuite/space.good2
-rw-r--r--testsuite/space.inp2
-rw-r--r--testsuite/space.sed1
-rw-r--r--testsuite/subwrite.inp4
-rw-r--r--testsuite/subwrite.sed1
-rw-r--r--testsuite/subwrt1.good4
-rw-r--r--testsuite/subwrt2.good2
-rw-r--r--testsuite/testcases.h167
-rw-r--r--testsuite/tst-boost.c227
-rw-r--r--testsuite/tst-pcre.c241
-rwxr-xr-xtestsuite/tst-regexbin0 -> 151376 bytes
-rw-r--r--testsuite/tst-regex.c265
-rw-r--r--testsuite/tst-rxspencer.c551
-rw-r--r--testsuite/uniq.good874
-rw-r--r--testsuite/uniq.inp2058
-rw-r--r--testsuite/uniq.sed20
-rw-r--r--testsuite/version.gin5
-rw-r--r--testsuite/writeout.inp4
-rw-r--r--testsuite/writeout.sed1
-rw-r--r--testsuite/wrtout1.good4
-rw-r--r--testsuite/wrtout2.good2
-rw-r--r--testsuite/xabcx.good4
-rw-r--r--testsuite/xabcx.inp4
-rw-r--r--testsuite/xabcx.sed2
-rw-r--r--testsuite/xbxcx.good7
-rw-r--r--testsuite/xbxcx.inp7
-rw-r--r--testsuite/xbxcx.sed2
-rw-r--r--testsuite/xbxcx3.good7
-rw-r--r--testsuite/xbxcx3.inp7
-rw-r--r--testsuite/xbxcx3.sed1
-rw-r--r--testsuite/xemacs.good67
-rw-r--r--testsuite/xemacs.inp67
-rw-r--r--testsuite/xemacs.sed16
268 files changed, 65101 insertions, 0 deletions
diff --git a/ABOUT-NLS b/ABOUT-NLS
new file mode 100644
index 0000000..5fde45a
--- /dev/null
+++ b/ABOUT-NLS
@@ -0,0 +1,324 @@
+Notes on the Free Translation Project
+*************************************
+
+ Free software is going international! The Free Translation Project
+is a way to get maintainers of free software, translators, and users all
+together, so that will gradually become able to speak many languages.
+A few packages already provide translations for their messages.
+
+ If you found this `ABOUT-NLS' file inside a distribution, you may
+assume that the distributed package does use GNU `gettext' internally,
+itself available at your nearest GNU archive site. But you do _not_
+need to install GNU `gettext' prior to configuring, installing or using
+this package with messages translated.
+
+ Installers will find here some useful hints. These notes also
+explain how users should proceed for getting the programs to use the
+available translations. They tell how people wanting to contribute and
+work at translations should contact the appropriate team.
+
+ When reporting bugs in the `intl/' directory or bugs which may be
+related to internationalization, you should tell about the version of
+`gettext' which is used. The information can be found in the
+`intl/VERSION' file, in internationalized packages.
+
+Quick configuration advice
+==========================
+
+ If you want to exploit the full power of internationalization, you
+should configure it using
+
+ ./configure --with-included-gettext
+
+to force usage of internationalizing routines provided within this
+package, despite the existence of internationalizing capabilities in the
+operating system where this package is being installed. So far, only
+the `gettext' implementation in the GNU C library version 2 provides as
+many features (such as locale alias, message inheritance, automatic
+charset conversion or plural form handling) as the implementation here.
+It is also not possible to offer this additional functionality on top
+of a `catgets' implementation. Future versions of GNU `gettext' will
+very likely convey even more functionality. So it might be a good idea
+to change to GNU `gettext' as soon as possible.
+
+ So you need _not_ provide this option if you are using GNU libc 2 or
+you have installed a recent copy of the GNU gettext package with the
+included `libintl'.
+
+INSTALL Matters
+===============
+
+ Some packages are "localizable" when properly installed; the
+programs they contain can be made to speak your own native language.
+Most such packages use GNU `gettext'. Other packages have their own
+ways to internationalization, predating GNU `gettext'.
+
+ By default, this package will be installed to allow translation of
+messages. It will automatically detect whether the system already
+provides the GNU `gettext' functions. If not, the GNU `gettext' own
+library will be used. This library is wholly contained within this
+package, usually in the `intl/' subdirectory, so prior installation of
+the GNU `gettext' package is _not_ required. Installers may use
+special options at configuration time for changing the default
+behaviour. The commands:
+
+ ./configure --with-included-gettext
+ ./configure --disable-nls
+
+will respectively bypass any pre-existing `gettext' to use the
+internationalizing routines provided within this package, or else,
+_totally_ disable translation of messages.
+
+ When you already have GNU `gettext' installed on your system and run
+configure without an option for your new package, `configure' will
+probably detect the previously built and installed `libintl.a' file and
+will decide to use this. This might be not what is desirable. You
+should use the more recent version of the GNU `gettext' library. I.e.
+if the file `intl/VERSION' shows that the library which comes with this
+package is more recent, you should use
+
+ ./configure --with-included-gettext
+
+to prevent auto-detection.
+
+ The configuration process will not test for the `catgets' function
+and therefore it will not be used. The reason is that even an
+emulation of `gettext' on top of `catgets' could not provide all the
+extensions of the GNU `gettext' library.
+
+ Internationalized packages have usually many `po/LL.po' files, where
+LL gives an ISO 639 two-letter code identifying the language. Unless
+translations have been forbidden at `configure' time by using the
+`--disable-nls' switch, all available translations are installed
+together with the package. However, the environment variable `LINGUAS'
+may be set, prior to configuration, to limit the installed set.
+`LINGUAS' should then contain a space separated list of two-letter
+codes, stating which languages are allowed.
+
+Using This Package
+==================
+
+ As a user, if your language has been installed for this package, you
+only have to set the `LANG' environment variable to the appropriate
+`LL_CC' combination. Here `LL' is an ISO 639 two-letter language code,
+and `CC' is an ISO 3166 two-letter country code. For example, let's
+suppose that you speak German and live in Germany. At the shell
+prompt, merely execute `setenv LANG de_DE' (in `csh'),
+`export LANG; LANG=de_DE' (in `sh') or `export LANG=de_DE' (in `bash').
+This can be done from your `.login' or `.profile' file, once and for
+all.
+
+ You might think that the country code specification is redundant.
+But in fact, some languages have dialects in different countries. For
+example, `de_AT' is used for Austria, and `pt_BR' for Brazil. The
+country code serves to distinguish the dialects.
+
+ Not all programs have translations for all languages. By default, an
+English message is shown in place of a nonexistent translation. If you
+understand other languages, you can set up a priority list of languages.
+This is done through a different environment variable, called
+`LANGUAGE'. GNU `gettext' gives preference to `LANGUAGE' over `LANG'
+for the purpose of message handling, but you still need to have `LANG'
+set to the primary language; this is required by other parts of the
+system libraries. For example, some Swedish users who would rather
+read translations in German than English for when Swedish is not
+available, set `LANGUAGE' to `sv:de' while leaving `LANG' to `sv_SE'.
+
+ In the `LANGUAGE' environment variable, but not in the `LANG'
+environment variable, `LL_CC' combinations can be abbreviated as `LL'
+to denote the language's main dialect. For example, `de' is equivalent
+to `de_DE' (German as spoken in Germany), and `pt' to `pt_PT'
+(Portuguese as spoken in Portugal) in this context.
+
+Translating Teams
+=================
+
+ For the Free Translation Project to be a success, we need interested
+people who like their own language and write it well, and who are also
+able to synergize with other translators speaking the same language.
+Each translation team has its own mailing list. The up-to-date list of
+teams can be found at the Free Translation Project's homepage,
+`http://www.iro.umontreal.ca/contrib/po/HTML/', in the "National teams"
+area.
+
+ If you'd like to volunteer to _work_ at translating messages, you
+should become a member of the translating team for your own language.
+The subscribing address is _not_ the same as the list itself, it has
+`-request' appended. For example, speakers of Swedish can send a
+message to `sv-request@li.org', having this message body:
+
+ subscribe
+
+ Keep in mind that team members are expected to participate
+_actively_ in translations, or at solving translational difficulties,
+rather than merely lurking around. If your team does not exist yet and
+you want to start one, or if you are unsure about what to do or how to
+get started, please write to `translation@iro.umontreal.ca' to reach the
+coordinator for all translator teams.
+
+ The English team is special. It works at improving and uniformizing
+the terminology in use. Proven linguistic skill are praised more than
+programming skill, here.
+
+Available Packages
+==================
+
+ Languages are not equally supported in all packages. The following
+matrix shows the current state of internationalization, as of September
+2001. The matrix shows, in regard of each package, for which languages
+PO files have been submitted to translation coordination, with a
+translation percentage of at least 50%.
+
+ Ready PO files bg cs da de el en eo es et fi fr gl he hr id it ja
+ +----------------------------------------------------+
+ a2ps | [] [] [] |
+ bash | [] [] [] [] |
+ bfd | |
+ binutils | [] |
+ bison | [] [] [] [] [] |
+ clisp | [] [] [] [] |
+ cpio | [] [] [] [] [] |
+ diffutils | [] [] [] [] [] [] [] |
+ enscript | [] [] |
+ error | [] [] |
+ fetchmail | |
+ fileutils | [] [] [] [] [] [] [] [] |
+ findutils | [] [] [] [] [] [] [] [] |
+ flex | [] [] [] |
+ freetype | |
+ gas | |
+ gawk | [] [] |
+ gcal | |
+ gcc | |
+ gettext | [] [] [] [] [] [] [] [] [] [] |
+ gnupg | [] [] [] [] [] [] [] |
+ gprof | |
+ grep | [] [] [] [] [] [] [] [] |
+ hello | [] [] [] [] [] [] [] [] [] [] [] |
+ id-utils | [] [] [] |
+ indent | [] [] [] [] [] |
+ jpilot | [] |
+ kbd | |
+ ld | [] |
+ libc | [] [] [] [] [] [] [] [] |
+ lilypond | [] |
+ lynx | [] [] [] [] |
+ m4 | [] [] [] [] [] [] [] [] |
+ make | [] [] [] [] [] [] |
+ mysecretdiary | [] |
+ nano | [] [] [] |
+ opcodes | |
+ parted | [] [] [] |
+ ptx | [] [] [] [] [] [] [] |
+ python | |
+ recode | [] [] [] [] [] [] [] [] [] |
+ sed | [] [] [] [] [] [] [] [] [] [] [] [] |
+ sh-utils | [] [] [] [] [] [] [] [] [] [] |
+ sharutils | [] [] [] [] [] [] [] [] |
+ sketch | |
+ soundtracker | [] [] [] |
+ sp | |
+ tar | [] [] [] [] [] [] [] [] |
+ texinfo | [] [] [] [] [] [] |
+ textutils | [] [] [] [] [] [] [] [] |
+ util-linux | [] [] |
+ wdiff | [] [] [] |
+ wget | [] [] [] [] [] [] [] [] [] [] |
+ +----------------------------------------------------+
+ bg cs da de el en eo es et fi fr gl he hr id it ja
+ 0 14 24 32 11 1 8 23 13 1 33 22 4 0 7 9 18
+
+ ko lv nb nl nn no pl pt pt_BR ru sk sl sv tr uk zh
+ +----------------------------------------------------+
+ a2ps | [] [] [] | 6
+ bash | | 4
+ bfd | | 0
+ binutils | | 1
+ bison | [] | 6
+ clisp | [] | 5
+ cpio | [] [] [] [] [] | 10
+ diffutils | [] [] [] [] | 11
+ enscript | [] [] [] | 5
+ error | [] [] | 4
+ fetchmail | | 0
+ fileutils | [] [] [] [] [] [] [] [] [] | 17
+ findutils | [] [] [] [] [] [] [] [] | 16
+ flex | [] [] [] | 6
+ freetype | | 0
+ gas | | 0
+ gawk | [] | 3
+ gcal | | 0
+ gcc | | 0
+ gettext | [] [] [] [] [] [] [] [] | 18
+ gnupg | [] [] [] | 10
+ gprof | | 0
+ grep | [] [] [] [] | 12
+ hello | [] [] [] [] [] [] [] [] [] [] [] | 22
+ id-utils | [] [] [] | 6
+ indent | [] [] [] [] [] [] [] | 12
+ jpilot | | 1
+ kbd | [] | 1
+ ld | | 1
+ libc | [] [] [] [] [] [] [] [] | 16
+ lilypond | [] [] | 3
+ lynx | [] [] [] [] | 8
+ m4 | [] [] [] [] | 12
+ make | [] [] [] [] [] [] | 12
+ mysecretdiary | | 1
+ nano | [] | 4
+ opcodes | [] | 1
+ parted | [] [] | 5
+ ptx | [] [] [] [] [] [] [] [] | 15
+ python | | 0
+ recode | [] [] [] [] | 13
+ sed | [] [] [] [] [] [] [] | 19
+ sh-utils | [] [] [] [] [] [] [] [] [] [] [] | 21
+ sharutils | [] [] [] | 11
+ sketch | | 0
+ soundtracker | | 3
+ sp | | 0
+ tar | [] [] [] [] [] [] [] | 15
+ texinfo | [] | 7
+ textutils | [] [] [] [] [] [] [] [] | 16
+ util-linux | [] [] | 4
+ wdiff | [] [] [] [] | 7
+ wget | [] [] [] [] [] [] [] | 17
+ +----------------------------------------------------+
+ 33 teams ko lv nb nl nn no pl pt pt_BR ru sk sl sv tr uk zh
+ 53 domains 9 1 6 20 0 6 17 1 13 25 10 11 23 21 2 2 387
+
+ Some counters in the preceding matrix are higher than the number of
+visible blocks let us expect. This is because a few extra PO files are
+used for implementing regional variants of languages, or language
+dialects.
+
+ For a PO file in the matrix above to be effective, the package to
+which it applies should also have been internationalized and
+distributed as such by its maintainer. There might be an observable
+lag between the mere existence a PO file and its wide availability in a
+distribution.
+
+ If September 2001 seems to be old, you may fetch a more recent copy
+of this `ABOUT-NLS' file on most GNU archive sites. The most
+up-to-date matrix with full percentage details can be found at
+`http://www.iro.umontreal.ca/contrib/po/HTML/matrix.html'.
+
+Using `gettext' in new packages
+===============================
+
+ If you are writing a freely available program and want to
+internationalize it you are welcome to use GNU `gettext' in your
+package. Of course you have to respect the GNU Library General Public
+License which covers the use of the GNU `gettext' library. This means
+in particular that even non-free programs can use `libintl' as a shared
+library, whereas only free software can use `libintl' as a static
+library or use modified versions of `libintl'.
+
+ Once the sources are changed appropriately and the setup can handle
+to use of `gettext' the only thing missing are the translations. The
+Free Translation Project is also available for packages which are not
+developed inside the GNU project. Therefore the information given above
+applies also for every other Free Software Project. Contact
+`translation@iro.umontreal.ca' to make the `.pot' files available to
+the translation teams.
+
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..4474df9
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,5 @@
+GNU Sed was first authored by Jay Fenlason (hack@gnu.org)
+and later modified by Tom Lord (lord@gnu.org).
+
+It is currently being maintained by Ken Pizzini (ken@gnu.org)
+and Paolo Bonzini (bonzini@gnu.org).
diff --git a/BUGS b/BUGS
new file mode 100644
index 0000000..023fa5d
--- /dev/null
+++ b/BUGS
@@ -0,0 +1,103 @@
+* ABOUT BUGS
+
+Before reporting a bug, please check the list of known bugs
+and the list of oft-reported non-bugs (below).
+
+Bugs and comments may be sent to bonzini@gnu.org; please
+include in the Subject: header the first line of the output of
+``sed --version''.
+
+Please do not send a bug report like this:
+
+ [while building frobme-1.3.4]
+ $ configure
+ sed: file sedscr line 1: Unknown option to 's'
+
+If sed doesn't configure your favorite package, take a few extra
+minutes to identify the specific problem and make a stand-alone test
+case.
+
+A stand-alone test case includes all the data necessary to perform the
+test, and the specific invocation of sed that causes the problem. The
+smaller a stand-alone test case is, the better. A test case should
+not involve something as far removed from sed as ``try to configure
+frobme-1.3.4''. Yes, that is in principle enough information to look
+for the bug, but that is not a very practical prospect.
+
+
+
+* NON-BUGS
+
+`N' command on the last line
+
+ Most versions of sed exit without printing anything when the `N'
+ command is issued on the last line of a file. GNU sed instead
+ prints pattern space before exiting unless of course the `-n'
+ command switch has been specified. More information on the reason
+ behind this choice can be found in the Info manual.
+
+
+regex syntax clashes (problems with backslashes)
+
+ sed uses the Posix basic regular expression syntax. According to
+ the standard, the meaning of some escape sequences is undefined in
+ this syntax; notable in the case of GNU sed are `\|', `\+', `\?',
+ `\`', `\'', `\<', `\>', `\b', `\B', `\w', and `\W'.
+
+ As in all GNU programs that use Posix basic regular expressions, sed
+ interprets these escape sequences as meta-characters. So, `x\+'
+ matches one or more occurrences of `x'. `abc\|def' matches either
+ `abc' or `def'.
+
+ This syntax may cause problems when running scripts written for other
+ seds. Some sed programs have been written with the assumption that
+ `\|' and `\+' match the literal characters `|' and `+'. Such scripts
+ must be modified by removing the spurious backslashes if they are to
+ be used with recent versions of sed (not only GNU sed).
+
+ On the other hand, some scripts use `s|abc\|def||g' to remove occurrences
+ of _either_ `abc' or `def'. While this worked until sed 4.0.x, newer
+ versions interpret this as removing the string `abc|def'. This is
+ again undefined behavior according to POSIX, but this interpretation
+ is arguably more robust: the older one, for example, required that
+ the regex matcher parsed `\/' as `/' in the common case of escaping
+ a slash, which is again undefined behavior; the new behavior avoids
+ this, and this is good because the regex matcher is only partially
+ under our control.
+
+ In addition, GNU sed supports several escape characters (some of
+ which are multi-character) to insert non-printable characters
+ in scripts (`\a', `\c', `\d', `\o', `\r', `\t', `\v', `\x'). These
+ can cause similar problems with scripts written for other seds.
+
+
+-i clobbers read-only files
+
+ In short, `sed d -i' will let one delete the contents of
+ a read-only file, and in general the `-i' option will let
+ one clobber protected files. This is not a bug, but rather a
+ consequence of how the Unix filesystem works.
+
+ The permissions on a file say what can happen to the data
+ in that file, while the permissions on a directory say what can
+ happen to the list of files in that directory. `sed -i'
+ will not ever open for writing a file that is already on disk,
+ rather, it will work on a temporary file that is finally renamed
+ to the original name: if you rename or delete files, you're actually
+ modifying the contents of the directory, so the operation depends on
+ the permissions of the directory, not of the file). For this same
+ reason, sed will not let one use `-i' on a writeable file in a
+ read-only directory (but unbelievably nobody reports that as a
+ bug...).
+
+
+`0a' does not work (gives an error)
+
+ There is no line 0. 0 is a special address that is only used to treat
+ addresses like `0,/RE/' as active when the script starts: if you
+ write `1,/abc/d' and the first line includes the word `abc', then
+ that match would be ignored because address ranges must span at least
+ two lines (barring the end of the file); but what you probably wanted is
+ to delete every line up to the first one including `abc', and this
+ is obtained with `0,/abc/d'.
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d60c31a
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/COPYING.DOC b/COPYING.DOC
new file mode 100644
index 0000000..b42936b
--- /dev/null
+++ b/COPYING.DOC
@@ -0,0 +1,355 @@
+ GNU Free Documentation License
+ Version 1.1, March 2000
+
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+0. PREAMBLE
+
+The purpose of this License is to make a manual, textbook, or other
+written document "free" in the sense of freedom: to assure everyone
+the effective freedom to copy and redistribute it, with or without
+modifying it, either commercially or noncommercially. Secondarily,
+this License preserves for the author and publisher a way to get
+credit for their work, while not being considered responsible for
+modifications made by others.
+
+This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft
+license designed for free software.
+
+We have designed this License in order to use it for manuals for free
+software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals;
+it can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.
+
+
+1. APPLICABILITY AND DEFINITIONS
+
+This License applies to any manual or other work that contains a
+notice placed by the copyright holder saying it can be distributed
+under the terms of this License. The "Document", below, refers to any
+such manual or work. Any member of the public is a licensee, and is
+addressed as "you".
+
+A "Modified Version" of the Document means any work containing the
+Document or a portion of it, either copied verbatim, or with
+modifications and/or translated into another language.
+
+A "Secondary Section" is a named appendix or a front-matter section of
+the Document that deals exclusively with the relationship of the
+publishers or authors of the Document to the Document's overall subject
+(or to related matters) and contains nothing that could fall directly
+within that overall subject. (For example, if the Document is in part a
+textbook of mathematics, a Secondary Section may not explain any
+mathematics.) The relationship could be a matter of historical
+connection with the subject or with related matters, or of legal,
+commercial, philosophical, ethical or political position regarding
+them.
+
+The "Invariant Sections" are certain Secondary Sections whose titles
+are designated, as being those of Invariant Sections, in the notice
+that says that the Document is released under this License.
+
+The "Cover Texts" are certain short passages of text that are listed,
+as Front-Cover Texts or Back-Cover Texts, in the notice that says that
+the Document is released under this License.
+
+A "Transparent" copy of the Document means a machine-readable copy,
+represented in a format whose specification is available to the
+general public, whose contents can be viewed and edited directly and
+straightforwardly with generic text editors or (for images composed of
+pixels) generic paint programs or (for drawings) some widely available
+drawing editor, and that is suitable for input to text formatters or
+for automatic translation to a variety of formats suitable for input
+to text formatters. A copy made in an otherwise Transparent file
+format whose markup has been designed to thwart or discourage
+subsequent modification by readers is not Transparent. A copy that is
+not "Transparent" is called "Opaque".
+
+Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML
+or XML using a publicly available DTD, and standard-conforming simple
+HTML designed for human modification. Opaque formats include
+PostScript, PDF, proprietary formats that can be read and edited only
+by proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the
+machine-generated HTML produced by some word processors for output
+purposes only.
+
+The "Title Page" means, for a printed book, the title page itself,
+plus such following pages as are needed to hold, legibly, the material
+this License requires to appear in the title page. For works in
+formats which do not have any title page as such, "Title Page" means
+the text near the most prominent appearance of the work's title,
+preceding the beginning of the body of the text.
+
+
+2. VERBATIM COPYING
+
+You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies
+to the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further
+copying of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+
+You may also lend copies, under the same conditions stated above, and
+you may publicly display copies.
+
+
+3. COPYING IN QUANTITY
+
+If you publish printed copies of the Document numbering more than 100,
+and the Document's license notice requires Cover Texts, you must enclose
+the copies in covers that carry, clearly and legibly, all these Cover
+Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
+the back cover. Both covers must also clearly and legibly identify
+you as the publisher of these copies. The front cover must present
+the full title with all words of the title equally prominent and
+visible. You may add other material on the covers in addition.
+Copying with changes limited to the covers, as long as they preserve
+the title of the Document and satisfy these conditions, can be treated
+as verbatim copying in other respects.
+
+If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.
+
+If you publish or distribute Opaque copies of the Document numbering
+more than 100, you must either include a machine-readable Transparent
+copy along with each Opaque copy, or state in or with each Opaque copy
+a publicly-accessible computer-network location containing a complete
+Transparent copy of the Document, free of added material, which the
+general network-using public has access to download anonymously at no
+charge using public-standard network protocols. If you use the latter
+option, you must take reasonably prudent steps, when you begin
+distribution of Opaque copies in quantity, to ensure that this
+Transparent copy will remain thus accessible at the stated location
+until at least one year after the last time you distribute an Opaque
+copy (directly or through your agents or retailers) of that edition to
+the public.
+
+It is requested, but not required, that you contact the authors of the
+Document well before redistributing any large number of copies, to give
+them a chance to provide you with an updated version of the Document.
+
+
+4. MODIFICATIONS
+
+You may copy and distribute a Modified Version of the Document under
+the conditions of sections 2 and 3 above, provided that you release
+the Modified Version under precisely this License, with the Modified
+Version filling the role of the Document, thus licensing distribution
+and modification of the Modified Version to whoever possesses a copy
+of it. In addition, you must do these things in the Modified Version:
+
+A. Use in the Title Page (and on the covers, if any) a title distinct
+ from that of the Document, and from those of previous versions
+ (which should, if there were any, be listed in the History section
+ of the Document). You may use the same title as a previous version
+ if the original publisher of that version gives permission.
+B. List on the Title Page, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the Modified
+ Version, together with at least five of the principal authors of the
+ Document (all of its principal authors, if it has less than five).
+C. State on the Title page the name of the publisher of the
+ Modified Version, as the publisher.
+D. Preserve all the copyright notices of the Document.
+E. Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.
+F. Include, immediately after the copyright notices, a license notice
+ giving the public permission to use the Modified Version under the
+ terms of this License, in the form shown in the Addendum below.
+G. Preserve in that license notice the full lists of Invariant Sections
+ and required Cover Texts given in the Document's license notice.
+H. Include an unaltered copy of this License.
+I. Preserve the section entitled "History", and its title, and add to
+ it an item stating at least the title, year, new authors, and
+ publisher of the Modified Version as given on the Title Page. If
+ there is no section entitled "History" in the Document, create one
+ stating the title, year, authors, and publisher of the Document as
+ given on its Title Page, then add an item describing the Modified
+ Version as stated in the previous sentence.
+J. Preserve the network location, if any, given in the Document for
+ public access to a Transparent copy of the Document, and likewise
+ the network locations given in the Document for previous versions
+ it was based on. These may be placed in the "History" section.
+ You may omit a network location for a work that was published at
+ least four years before the Document itself, or if the original
+ publisher of the version it refers to gives permission.
+K. In any section entitled "Acknowledgements" or "Dedications",
+ preserve the section's title, and preserve in the section all the
+ substance and tone of each of the contributor acknowledgements
+ and/or dedications given therein.
+L. Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers
+ or the equivalent are not considered part of the section titles.
+M. Delete any section entitled "Endorsements". Such a section
+ may not be included in the Modified Version.
+N. Do not retitle any existing section as "Endorsements"
+ or to conflict in title with any Invariant Section.
+
+If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.
+
+You may add a section entitled "Endorsements", provided it contains
+nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.
+
+You may add a passage of up to five words as a Front-Cover Text, and a
+passage of up to 25 words as a Back-Cover Text, to the end of the list
+of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or
+through arrangements made by) any one entity. If the Document already
+includes a cover text for the same cover, previously added by you or
+by arrangement made by the same entity you are acting on behalf of,
+you may not add another; but you may replace the old one, on explicit
+permission from the previous publisher that added the old one.
+
+The author(s) and publisher(s) of the Document do not by this License
+give permission to use their names for publicity for or to assert or
+imply endorsement of any Modified Version.
+
+
+5. COMBINING DOCUMENTS
+
+You may combine the Document with other documents released under this
+License, under the terms defined in section 4 above for modified
+versions, provided that you include in the combination all of the
+Invariant Sections of all of the original documents, unmodified, and
+list them all as Invariant Sections of your combined work in its
+license notice.
+
+The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by
+adding at the end of it, in parentheses, the name of the original
+author or publisher of that section if known, or else a unique number.
+Make the same adjustment to the section titles in the list of
+Invariant Sections in the license notice of the combined work.
+
+In the combination, you must combine any sections entitled "History"
+in the various original documents, forming one section entitled
+"History"; likewise combine any sections entitled "Acknowledgements",
+and any sections entitled "Dedications". You must delete all sections
+entitled "Endorsements."
+
+
+6. COLLECTIONS OF DOCUMENTS
+
+You may make a collection consisting of the Document and other documents
+released under this License, and replace the individual copies of this
+License in the various documents with a single copy that is included in
+the collection, provided that you follow the rules of this License for
+verbatim copying of each of the documents in all other respects.
+
+You may extract a single document from such a collection, and distribute
+it individually under this License, provided you insert a copy of this
+License into the extracted document, and follow this License in all
+other respects regarding verbatim copying of that document.
+
+
+7. AGGREGATION WITH INDEPENDENT WORKS
+
+A compilation of the Document or its derivatives with other separate
+and independent documents or works, in or on a volume of a storage or
+distribution medium, does not as a whole count as a Modified Version
+of the Document, provided no compilation copyright is claimed for the
+compilation. Such a compilation is called an "aggregate", and this
+License does not apply to the other self-contained works thus compiled
+with the Document, on account of their being thus compiled, if they
+are not themselves derivative works of the Document.
+
+If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one quarter
+of the entire aggregate, the Document's Cover Texts may be placed on
+covers that surround only the Document within the aggregate.
+Otherwise they must appear on covers around the whole aggregate.
+
+
+8. TRANSLATION
+
+Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License provided that you also include the
+original English version of this License. In case of a disagreement
+between the translation and the original English version of this
+License, the original English version will prevail.
+
+
+9. TERMINATION
+
+You may not copy, modify, sublicense, or distribute the Document except
+as expressly provided for under this License. Any other attempt to
+copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this
+License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+
+10. FUTURE REVISIONS OF THIS LICENSE
+
+The Free Software Foundation may publish new, revised versions
+of the GNU Free Documentation License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns. See
+http://www.gnu.org/copyleft/.
+
+Each version of the License is given a distinguishing version number.
+If the Document specifies that a particular numbered version of this
+License "or any later version" applies to it, you have the option of
+following the terms and conditions either of that specified version or
+of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.
+
+
+ADDENDUM: How to use this License for your documents
+
+To use this License in a document you have written, include a copy of
+the License in the document and put the following copyright and
+license notices just after the title page:
+
+ Copyright (c) YEAR YOUR NAME.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.1
+ or any later version published by the Free Software Foundation;
+ with the Invariant Sections being LIST THEIR TITLES, with the
+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+ A copy of the license is included in the section entitled "GNU
+ Free Documentation License".
+
+If you have no Invariant Sections, write "with no Invariant Sections"
+instead of saying which ones are invariant. If you have no
+Front-Cover Texts, write "no Front-Cover Texts" instead of
+"Front-Cover Texts being LIST"; likewise for Back-Cover Texts.
+
+If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of
+free software license, such as the GNU General Public License,
+to permit their use in free software.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..f0f64d3
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,2615 @@
+2004-10-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/utils.c (utils_id_s): Renamed to open_files.
+ (struct id): Renamed to struct open_file.
+
+2004-10-08 Jakub Jelinek <jakub@redhat.com>
+
+ * testsuite/Makefile.tests (bug-regex*, run-tests,
+ run-ptests): Use $(SED).
+ (version): Likewise; prepend $(SED) invocation with $(SEDENV).
+
+2004-08-16 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.1.2 released.
+
+2004-08-06 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (bad_command): Fix off-by-one error.
+ (snarf_char_class): Fix problem with [.....[] (i.e.
+ last char in class is a bracket.
+
+2004-06-30 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.1.1 released.
+
+2004-06-29 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (mark_subst_opts): Return int.
+ * sed/execute.c (open_next_file): Fix uninitialized
+ variable.
+
+2004-06-10 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.1 released.
+
+2004-03-25 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/obstack.h: Get current version.
+
+2004-03-13 Paolo Bonzini <bonzini@gnu.org>
+
+ Exit as soon as possible on an I/O error, and with
+ a better error message.
+
+ * lib/utils.c (ck_mkstemp, ck_rename, ck_getline): New
+ functions. Save temporary files into utils_id_s.
+ (struct id): Add a field named temp.
+ (ck_fopen): Init the new temp field of struct id.
+ (panic): Unlink temporary files before exiting.
+ * sed/execute.c (read_file_line): Use ck_getline.
+ (closedown): Use ck_rename.
+ (open_next_file): Use ck_mkstemp.
+
+2004-01-20 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h (enum addr_state): New definition.
+ (enum addr_type): Remove ADDR_IS_NUM2.
+ (struct sed_cmd): Replace a1_matched with range_state.
+ * sed/compile.c (next_cmd_entry): Use range_state.
+ (compile_program): Death to ADDR_IS_NUM2. Compile
+ N,Mp as Np if N>=M.
+ * sed/execute.c (match_address_p): Rewritten. Handle
+ ADDR_IS_NUM here.
+ (match_an_address_p): Suit to new match_address_p.
+ (execute_program): Adjust to use range_state in `c'.
+ Handle addr_bang here.
+ (reset_addresses): Use range_state.
+
+ (struct input): New field "reset_at_next_file".
+ (read_pattern_space): Use it instead of "separate_files".
+ (process_files): Initialize it.
+
+2004-01-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h: Do not include wchar.h and wctype.h, and do
+ not include the alloca stuff.
+ * basicdefs.h: Move all that here.
+
+2004-01-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/regexp.c [REG_PERL]: Use REG_STARTEND instead of regexec2.
+
+2004-01-09 Paul Eggert <eggert@twinsun.com>
+ Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h (posixicity): New variable, replaces POSIXLY_CORRECT.
+ * sed/sed.c (main): Set it.
+ * sed/compile.c: Use it instead of POSIXLY_CORRECT.
+ * sed/execute.c: Use it instead of POSIXLY_CORRECT.
+ * doc/sed-in.texi: Document it and --posix.
+
+2004-01-05 Paul Eggert <eggert@twinsun.com>
+ Paolo Bonzini <bonzini@gnu.org>
+
+ * NEWS: Fix [\n] to match either backslash or n in POSIXLY_CORRECT mode.
+ * doc/sed-in.texi: Document this. Also, document regular expressions
+ a bit better overall, using terminology that's more similar to POSIX.
+ * sed/sed.h (enum text_types): New definition.
+ * sed/compile.c (normalize_text): Replace final parameter with one of
+ type normalize_text. If TEXT_REGEX and in POSIXLY_CORRECT mode,
+ grok character classes without replacing \n inside them.
+
+2004-01-03 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (execute_program): print final line
+ after executing N, if not POSIXLY_CORRECT.
+
+2003-12-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c: fix "\\\n" in RHS of s command.
+ Reported by Mike Castle.
+ * testsuite/bkslashes.inp, testsuite/bkslashes.good,
+ testsuite/bkslashes.sed: New files.
+ * testsuite/Makefile.am, testsuite/Makefile.tests: Add
+ the bkslashes test.
+
+2003-12-16 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0b released.
+
+ * sed/mbcs.c: New file.
+ * sed/sed.h: Declare macros for mbcs.c.
+ * sed/compile.c: Use them.
+ (brlen): Moved to mbcs.c.
+ * sed/execute.c: Use them.
+ * sed/sed.c: call initialize_mbcs ().
+
+2003-12-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/regex.c (match_regex): fix memory leak.
+
+2003-11-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (reset_addresses): leave addresses 0
+ and 0~STEP enabled.
+
+2003-11-15 Jakub Jelinek <jakub@redhat.com>
+
+ * sed/regex.c: Use fastmap.
+
+2003-09-21 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0a released.
+
+ * sed/execute.c (struct line): Add mbstate field.
+ (str_append): Keep mbstate up to date.
+ (str_append_modified): Likewise, and use towupper/towlower.
+ (line_init): Initialize mbstate.
+ (line_copy): Copy mbstate.
+ (line_append): Copy mbstate.
+
+2003-07-15 Stepan Kasal <kasal@ucw.cz>
+ Paolo Bonzini <bonzini@fnu.org>
+
+ Change the way we treat lines which are not terminated by a newline.
+ Such lines are printed without the terminating newline (as before)
+ but as soon as more text is sent to the same output stream, the
+ missing newline is printed, so that the two lines don't concatenate.
+
+ * sed/execute.c (output_file): Is now struct output; users adjusted
+ to access the fp field, call output_missing_newline before, and
+ call flush_output afterwards.
+ (read_file_line): Set line.chomped FALSE each time we encounter a
+ line without the newline terminator, no matter whether this is the
+ last input file or not, and no matter whether we are in
+ POSIXLY_CORRECT mode or not.
+ (output_missing_newline): New function which prints the suppressed
+ newline, if necessary.
+ (flush_output): New function for a common pattern.
+ (output_line): Use struct output, set its flag accordingly.
+ (dump_append_queue): Use `ck_fwrite' instead of output_line.
+ (do_list): Flush the output stream at the end.
+ (closedown): The code ``if(separate_files) rewind_read_files();''
+ (read_pattern_space): ... has been moved here.
+ (process_files): Don't do the default `p' at the end, ...
+ (execute_program): ... as this function is now responsible for it;
+ add the code to the end of the function and to the command `q';
+ the commands `d', `D' and `Q' thus no longer have to forge an empty
+ line.
+ (execute_program): Commands `c' and `i' no longer call the
+ function output_line with chomped==FALSE; instead, they chomp
+ the text and call the function with chomped==TRUE.
+ (execute_program): Command `e' no longer uses output_line; it
+ calls ck_fwrite directly. Commands `e', `L' and `=' flush
+ the output stream at the end.
+ * sed/compile.c (special_files): Use `struct output' instead of the
+ file name.
+ (get_openfile): ... special files are no longer copied to file_read
+ or file_write.
+ (fp_list): Move to sed.h (users adjusted) and rename as...
+ * sed/sed.h (struct output): ...this. New flag missing_newline
+ associated to the output stream.
+ (struct sed_cmd, struct subst): Use `struct output *' instead of mere
+ `FILE *'; adjust compile.c and execute.c.
+ * testsuite/noeolw.sed, testsuite/noeolw.good, testsuite/noeolw.1good,
+ testsuite/noeolw.2good: New tests
+
+2003-07-15 Stepan Kasal <kasal@ucw.cz>
+
+ * lib/utils.h, sed/sed.h: #include "basicdefs.h",
+ don't include it from various *.c files.
+ * sed/regex.c: Don't include regex.h as it's included via sed.h.
+
+2003-06-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/getline.c: Don't realloc with first param = NULL.
+
+2003-05-07 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c: Make treatment of ADDR_IS_NUM_MOD
+ simpler, and fix bugs in 0~5,+1
+ * sed/compile.c: Complain about addresses like 0
+ and 0,3 which are sources of misunderstandings.
+ Reported by Akim Demaille <akim@epita.fr>
+
+2003-03-25 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.7 released
+
+ * sed/execute.c (append_replacement): Extract from
+ do_subst
+ (do_subst): Don't update count when a match was
+ skipped.
+ * testsuite/xbxcx3.good, testsuite/xbxcx3.sed,
+ testsuite/xbxcx3.inp: Regression tests
+
+2003-03-23 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (do_subst): Fix several bugs with
+ numbered matches
+ * testsuite/numsub2.good, testsuite/numsub2.inp,
+ testsuite/numsub2.sed, testsuite/numsub3.good,
+ testsuite/numsub3.inp, testsuite/numsub3.sed,
+ testsuite/numsub4.good, testsuite/numsub4.inp,
+ testsuite/numsub4.sed, testsuite/numsub5.good,
+ testsuite/numsub5.inp, testsuite/numsub5.sed:
+ regression tests for the bugs
+
+2003-03-15 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.6 released
+
+ * lib/mkstemp.c: Include sys/file.h if available for the
+ benefit of Ultrix
+
+2003-03-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c: Replace flagT with bool
+ * sed/execute.c: Replace flagT with bool
+ * sed/fmt.c: Replace flagT with bool
+ * sed/sed.c: Replace flagT with bool
+ * sed/regex.c: Replace flagT with bool
+
+2003-03-13 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (compile_program): Understand parameter
+ of `v'.
+
+ * sed/sed.c (usage): Split help message into multiple
+ strings
+ (main): Don't understand -h and -V
+
+2003-03-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (match_slash, snarf_char_class): More
+ multibyte character support
+ (brlen): New function
+ * testsuite/classes.good, testsuite/classes.inp,
+ testsuite/classes.sed: New files
+
+2003-03-10 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (match_slash): Strip the \ in front of
+ slashes (so that the matcher sees x/ for s/x\///). Don't
+ match / and [ unless at the start of a character.
+
+2003-02-18 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/regex.c (compile_regex): // matches the last regular
+ expression even in POSIXLY_CORRECT mode.
+ * sed/compile.c (normalize_text): Treat multibyte character
+ sets correctly
+ (read_text): Don't swallow backslash sequences, run text
+ through normalize_text
+ (compile_program): Ditto for y command
+
+ * sed/compile.c (normalize_text): Add parameter that says
+ whether the text will be processed further to remove more
+ backslash escapes. Callers adjusted
+ (match_slash): Remove same parameter from here. Callers adjusted.
+
+2003-02-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h: Fix prototype for match_regex, declare re_registers
+ if REG_PERL
+ * sed/execute.c (do_subst): Use re_registers
+ * sed/regex.c (copy_regs): New function
+ [REG_PERL]: Use re_registers
+ [!REG_PERL]: Avoid using internal entry points, support pre-glibc
+ 2.3 regex for the sake of --without-included-regex.
+
+2003-01-04 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h: Move some stuff from here...
+ * sed/basicdefs.h: ...to here
+ * lib/utils.c (ck_fopen): Add FAIL parameter
+ * lib/utils.h: Adjust parameter
+ * sed/compile.c, sed/execute.c, sed/sed.c: Adjust callers
+
+ * sed/basicdefs.h: Add TRUE/FALSE
+ * sed/compile.c, sed/execute.c, sed/sed.c: Use them
+ * sed/fmt.c: Do not redefine them
+
+2003-01-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.c: Bump copyright year
+
+2002-12-24 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.c: Use bindtextdomain
+ * sed/basicdefs.h [__EMX__]: Define initialize_main
+ * lib/getline.c [__EMX__]: Strip trailing CR
+
+ * sed/regex.c: Don't use N_ on the lines that define
+ error messages, some compilers complain.
+
+2002-12-18 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.5 released
+
+ * sed/compile.c: Don't use N_ on the lines that define
+ error messages, some compilers complain.
+
+2002-12-16 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c: Improvements to some error messages;
+ `a', `i', `l', `L', `r' accept two addresses except in
+ POSIXLY_CORRECT mode.
+
+2002-12-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/regex_internal.c: Fix problem on non-glibc
+ systems, from Jakub Jelinek
+ * lib/regex.c (RE_ENABLE_I18N): Conditionalize on
+ HAVE_MBRTOWC and HAVE_WCRTOMB.
+ * lib/getline.c: Fix compilation on non-glibc system
+ * lib/snprintf.c: Fix compilation on non-glibc system
+ * lib/basicdefs.h [P_]: Make more portable
+
+2002-12-12 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.4 released
+
+2002-11-21 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.3 released
+
+2002-11-19 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.2 released
+
+2002-11-05 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0.1 released
+
+2002-10-23 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 4.0 released
+
+2002-10-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/utils.c: Don't fail for EBADF in fflush
+ * src/sed.c: the_program is now a global
+
+2002-10-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/sed.c: Print GNU sed in --version for GNU sed,
+ and super-sed for super-sed (thanks to Bruno Haible)
+
+2002-10-17 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 3.96 released
+
+2002-10-16 Isamu Hasegawa <isamu@yamato.ibm.com>
+
+ * src/execute.c (execute_program): Multibyte 'y'
+ * src/compile.c (compile_program): Likewise
+ * src/sed.h: Likewise
+
+2002-10-08 Paolo Bonzini <bonzini@gnu.org>
+
+ *** Version 3.95 released
+
+2002-07-15 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/sed.h: rfile --> fname, wfile --> fp
+ * src/compile.c (compile_command): Parse 'R' like 'w', use
+ separate lists for file read and file write
+ * src/compile.c (get_openfile): New name of get_writefile
+ * src/compile.c (rewind_read_files): New function
+ * src/sed.h: Declared here
+ * src/execute.c (closedown): And called here
+ * src/execute.c (append_queue): Added 'free' field
+ * src/execute.c (execute_program): Implement 'R'
+
+2002-06-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/execute.c (do_subst): Replaced flag was set on every
+ regexp match, while the first matches should not set it
+ for s///N.
+
+2002-06-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/compile.c (compile_file): Open the script in text mode
+ * lib/utils.c (utils_fp_name): Shorten the output
+ * lib/utils.c (ck_fread, ck_fwrite, ck_fflush): Clearerr
+ after printing an error.
+ * lib/utils.c (ck_fclose): Work on stdout as well if stream == NULL
+ and flush before closing to check for errors
+
+2002-05-30 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/compile.c (compile_program): Implement W
+ * src/execute.c (execute_program): Likewise
+
+2002-04-23 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/sed.c (usage, main): Parse -s
+ * src/sed.h (separate_files): New variable
+ * src/execute.c (separate_files): New variable
+ * src/execute.c (reset_addresses): New function to make range
+ addresses work separately on each file when using in-place
+ editing
+ * src/execute.c (execute_program): The `n' and `N' use test_eof
+ so that the script restarts at end of file, not at end of input
+ * src/execute.c (test_dollar_EOF): Make $ work separately
+ on each file when using -s; renamed to test_eof
+
+2002-02-28 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/sed.h (struct sed_cmd): exit_status -> int_arg
+ * src/compile.c: Likewise
+ * src/execute.c: Likewise
+
+ * src/compile.c (compile_command): Parse `l' like
+ `q' and `Q'; default for int_arg is -1
+ * src/execute.c (do_list): New argument, used instead
+ of lcmd_out_line_len
+ (execute_program): Interpret int_arg for the `l' command;
+ return 0 for `q' and `Q' if int_arg is -1
+
+ * src/fmt.c: New file, looted from GNU textutils
+ * src/compile.c: Parse `L'
+ * src/execute.c: Execute `L'
+
+2002-02-14 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/execute.c (str_append_modified): Fixed a stupid
+ bug (stop condition was *start == *end, meant to be
+ start == end)
+
+2002-02-05 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/utils.c: Added directory parameter to
+ temp_file_template
+ * lib/utils.h: Adjusted
+ * src/execute.c: Adjusted
+
+2002-01-29 Paolo Bonzini <bonzini@gnu.org>
+
+ * src/compile.c (mark_subst_opts): Signal an error if
+ there are multiple g or p options
+ * src/compile.c (compile_program): Raise appropriate
+ error if second string in y command is longer than
+ first (used to be "excess junk after command")
+
+2001-12-31 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/getline.c: Strip the terminating \r under Windows
+ or MS-DOS.
+
+ * testsuite/xemacs.sed, testsuite/xemacs.inp,
+ testsuite/xemacs.good: Submitted by John Fremlin
+ (john@fremlin.de)
+
+2001-12-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (do_subst): Flags in optimized s/^xx/
+ commands were discarded (see the change below)
+
+2001-12-19 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (resize_line): Limit inactive space to two
+ thirds of a buffer
+ * sed/execute.c (line_init): Initialize buf->active
+ * sed/execute.c (str_append, str_append_modified, line_copy,
+ do_list, do_subst, execute_program, process_files): Operate
+ on active space
+
+ * sed/execute.c (do_subst): Optimize s/^xx// by making a part
+ of the buffer inactive and s/xx$// by truncating it.
+ * sed/execute.c (execute_program): Optimize D by making a part
+ of the buffer inactive
+
+ * testsuite/uniq.sed, testsuite/uniq.inp, testsuite/uniq.good:
+ added to test P and D commands.
+ * testsuite/fasts.sed, testsuite/fasts.inp, testsuite/fasts.good:
+ added to test the new optimization done on the `s' command.
+
+2001-12-17 Paolo Bonzini <bonzini@gnu.org>
+
+ * testsuite/dc.inp: Also compute Easter of 2002 :-)
+
+ * sed/execute.c [!HAVE_FCHMOD]: Don't chmod the output file
+ if working in-place
+
+2001-11-12 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/sed.h (struct sed_cmd): a1 is a pointer too
+ * sed/compile.c: Likewise
+ * sed/execute.c: Likewise
+
+ * sed/compile.c: Use obstacks
+ * sed/execute.c: Likewise
+
+2001-11-09 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (mark_subst_opts): Parse option `e',
+ preserve two occurrences of the `e' and `p' options.
+ * sed/execute.c (do_subst) [HAVE_POPEN]: Interpret option
+ `e' (evaluate, like Perl's but uses Bourne shell).
+ * sed/sed.h (struct subst): Add an `eval' flag.
+
+ * sed/compile.c (compile_program): Compile command `e'
+ like `c'.
+ * sed/execute.c (execute_program): Execute command `e'.
+
+2001-09-25 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (get_writefile) [!POSIXLY_CORRECT]:
+ support /dev/stdout
+ * sed/execute.c (open_next_file, closedown): Support
+ in-place editing
+ * sed/execute.c (backup_file_name): New function to
+ support in-place editing
+ * sed/main.c (usage, main): Parse -i.
+ * sed/utils.c: Moved to lib directory
+
+ * lib/utils.c (temp_file_template): New function.
+ * sed/utils.h: Declared temp_file_template.
+
+2001-09-05 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/execute.c (do_subst): `baaac', if passed through
+ s/a*/x/g, gave `xbxxcx' rather than `xbxcx' (because an
+ empty string matched before the `c'. Fixed.
+
+ * sed/execute.c: Removed mmap support, I/O is done using
+ getline (slower but more bug-proof).
+ * sed/utils.c: Likewise.
+ * lib/getline.c: New file
+
+2001-03-22 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (normalize_text) [POSIXLY_CORRECT]: Enable
+ escapes in modes other than BRE.
+
+2001-03-21 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (normalize_text): Support \XXX in Perl mode,
+ \oXXX in non-Perl mode.
+
+2001-03-18 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (compile_program): Fixed missing break when
+ compiling 'q' and 'Q'.
+
+ * sed/compile.c (check_final_program): Removed now spurious
+ call to compile_regex
+ * sed/regex.c (compile_regex): Don't track the last compiled
+ regex
+ * sed/regex.c (execute_regex): Track here the last compiled
+ regex
+
+2001-03-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (setup_replacement): Support \[lLuUE] like
+ Perl and vi.
+ * sed/compile.c (new_replacement): Accept new parameter
+ to support \[lLUuE].
+ * sed/sed.h (enum replacement_types): New declaration
+ * sed/execute.c (do_subst): Use new function str_append_modified
+ to apply the changes required via \[lLUuE].
+ * sed/execute.c (str_append_modified): New function
+
+2001-03-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (setup_replacement): Count the number of backreferences
+ that the RHS needs
+ * sed/regex.c (compile_regex): Check if there is a sufficient number
+ of backreferences (new argument needed_sub replaces nosub)
+ * sed/compile.c (compile_address, compile_program,
+ check_final_program): Callers adjusted
+
+2001-02-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (compile_program): Added `Q' (quit without output)
+ * sed/execute.c (execute_program): Ditto
+
+ * sed/compile.c (compile_program): Fill in exit_status for `q' and `Q'
+ * sed/execute.c (execute_program): Return -1 for `go on', 0..255
+ to set the exit status
+ * sed/execute.c (process_files): Interpret new convention for
+ execute_program, return sed's exit code
+ * sed/sed.c (main): Return process_files's exit code
+ * sed/sed.h (struct sed_cmd): Declare exit_status
+
+2001-01-07 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c (compile_program): Added `T' (branch if failed)
+ * sed/execute.c (shrink_program, execute_program): Ditto
+
+2001-01-04 Paolo Bonzini <bonzini@gnu.org>
+
+ * testsuite/Makefile.am: Use automake's implementation
+ of `make check'. Removed the test targets
+ * testsuite/Makefile.tests: Moved the test targets here
+ (new file).
+ * testsuite/runtest: New file
+
+ * testsuite/Makefile.tests: `khadafy' test uses EREs.
+
+ * testsuite/spencer.inp: Removed the ^* test
+
+ * testsuite/spencer.sh: Don't rely on awk; more comments too
+
+2001-01-03 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c(snarf_char_class) [REG_PERL]: Don't parse
+ `\n' specially
+ * sed/compile.c(match_slash) [REG_PERL]: Ditto
+
+ * sed/compile.c(read_text) [REG_PERL]: Support [xX] modifiers
+ * sed/compile.c(mark_subst_opts) [REG_PERL]: Ditto
+
+2000-12-21 Paolo Bonzini <bonzini@gnu.org>
+
+ * lib/snprintf.c [BOOTSTRAP]: Don't include stdio.h
+ * lib/strerror.c [BOOTSTRAP]: Don't include stdio.h
+ * sed/execute.c [!HAVE_ISATTY]: Don't buffer stdin
+
+2000-12-11 Paolo Bonzini <bonzini@gnu.org>
+
+ * sed/compile.c(mark_subst_opts): Support [mMsS] flags
+ * sed/compile.c(read_text): Support [MS] flags for
+ addresses
+ * sed/regex.c(compile_regex): Support arbitrary flags for
+ regncomp.
+
+ * sed/regex.c(compile_regex) [REG_PERL]: Don't call
+ normalize_text.
+
+2000-12-08 Paolo Bonzini <bonzini@gnu.org>
+
+ * basicdefs.h: Moved here from the `sed' subdirectory.
+
+ * configure.in: Removed crap to pick a regex engine.
+ Added snprintf to the AC_REPLACE_FUNCS call.
+
+ * lib/snprintf.c: New file.
+
+ * sed/regex.c(compile_regex): Use regncomp
+ * sed/regex.c(match_regex): Use regexec2
+
+ * sed/compile.c(compile_program): Implemented the `v' command.
+
+ * sed/sed.c(main): Implemented the `r' and `R' options
+
+ * sed/sed.h: Replaced use_extended_syntax_t with
+ extended_regexp_flags to support Perl regular expressions.
+
+ * sed/execute.c(open_next_file): Don't mmap stdin (because
+ we cannot seek into it, so a redirected stdin's contents
+ would not be "eaten" by sed)
+
+Mon Aug 30 23:40:08 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.02.80 released
+
+ * sed/execute.c(do_subst): lib/regex.c(re_search_2) seems to
+ want one extra backreference register; humor it.
+
+ * sed/regex.c(compile_regex): work around some odd assumptions
+ that lib/regex.c(re_compile_pattern) makes about our desired
+ RE syntax.
+
+ * configure.in: tweaked version to 3.02.80; added new entries
+ to the ALL_LINGUAS definition.
+
+ * doc/sed.1, doc/sed.texi, BUGS: explicitly request the output
+ of sed --version in bug-reporting instructions.
+
+ * doc/sed.texi: the old "informal seders list" is dead; document
+ the new sed-users mailing list instead (under Other Resources).
+
+
+Thu Aug 19 23:27:54 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.h: Add explicit #include of "regex-sed.h" (rather
+ than relying on parent file doing so); change the "cmd_regex"
+ member of sed_cmd: make it a pointer (instead of a struct),
+ and change its name to cmd_subst; add prototypes for newly
+ exported functions bad_prog(), normalize_text(), compile_regex(),
+ match_regex(), and release_regex(); drop rx_testing variable.
+
+ * sed/compile.c: move the compile_regex() function to regex.c;
+ export bad_prog() and normalize_text() functions; eliminate the
+ rx_testing debris; rename the NOLEAKS symbol to more descriptive
+ DEBUG_LEAKS; make cmd_regex to cmd_subst fixes (see above);
+ make use of newly abstracted release_regex() function.
+
+ * sed/execute.c: abstract out the regex matching to
+ regex.c:match_regex(); NOLEAKS to DEBUG_LEAKS change;
+ cmd_regex to cmd_subst structure member name change.
+
+ * sed/execute.c(do_subst): use re_registers/regoff_t instead of
+ regmatch_t to hold the backreference registers, make "offset"
+ always be relative to the beginning of the string (rather than
+ a delta from "start"), defer some matching bookkeeping (e.g.,
+ not_bol_p) to match_regex().
+
+ * sed/sed.c(main): loose rx_testing variable; NOLEAKS
+ (aka DEBUG_LEAKS) code attempting to release
+ _nl_current_default_domain is problematic, so omit it.
+
+ * sed/regex.c: new file --- abstracts out the interface to the
+ regex engine so that less conditional code is required in
+ compile.c and execute.c, and so as to make a change of engine
+ easier; implements compile_regex() (which looks an awful lot
+ like the one that used to live in compile.c), match_regex(),
+ and (if DEBUG_LEAKS is set) release_regex().
+
+Sun Apr 18 04:40:46 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c(main): conditionalize calls to setlocale() and
+ textdomain() to only occur if their support is needed/wanted.
+
+Sun Apr 18 03:01:46 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ * bootstrap.sh: "foo || bar && baz" was not grouping like I
+ expected ("foo || (bar && baz)") under at least one shell,
+ so change the test for a pre-existing config.h file to an
+ if statement.
+
+ * bootstrap.sh: added -DUSE_REGEX_GNU_H option to the
+ compiler invocation, to ensure that we get a usable
+ regex library included.
+
+Sun Apr 18 02:59:42 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.h, sed/utils.c: conditionalized inclusion of <libintl.h>
+ to occur only if ENABLE_NLS is defined.
+
+Sun Apr 18 01:48:45 PDT 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(xofa,normalize_text,convert_number): change
+ name of xofa() function to convert_number(); change semantics
+ to do all of the work of the text->number conversion.
+
+ * sed/compile.c(normalize_text): add new \dDDD decimal
+ and \oOOO octal escapes.
+
+Sun Mar 28 21:05:07 PST 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c(main): if NOLEAKS is set, free up a word that
+ the call to textdomain() allocated.
+
+ * sed/execute.c(read_file_line): plug up (minor) memory leak:
+ if buffer.alloc==0 we may have malloc()'d 1 byte anyway,
+ so be sure to FREE(buffer.text) before calling line_init();
+
+Fri Mar 26 16:52:10 PST 1999 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(match_slash): somewhere between 3.02
+ and 3.02a we lost the ability to use a newline as
+ the s/// delimiter; restore this ability.
+
+ * sed/compile.c(compile_regex): forget about trying
+ to cache the compiled form of the last RE --- it
+ causes more problems than its worth. We now only
+ cache the source form.
+
+ * testsuite/help.good: update to reflect output containing
+ new options.
+
+Sun Dec 6 00:51:23 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/utils.c(ck_fwrite): fix i18n bug of using a printf
+ fragment of "item%s" to handle plural text.
+
+Mon Nov 23 11:03:40 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * doc/sed.1, doc/sed.texi: ran ispell over these
+ files to catch the more obvious typos...
+
+Sun Nov 1 00:09:07 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/execute.c(do_list): make a `lcmd_out_line_len'
+ (--line-length) of zero mean "infinite length",
+ i.e., "never wrap".
+
+Sat Oct 31 23:06:50 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c(match_an_address_p,process_files),
+ compile.c(compile_program): back out the "zero-address"
+ changes of 1998-09-27. It was a neat idea, but there are
+ too many dark corners which don't work well. The
+ special code for handling line ranges starting at
+ address zero (from 1998-08-31) are still there though:
+ this seems to work fine with no surprises.
+
+Sat Oct 31 22:18:59 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c, sed/sed.h, sed/execute.c: added new
+ `lcmd_out_line_len' variable. (Idea suggested by
+ Carlos J. G. Duarte <l38076@alfa.ist.utl.pt>.)
+ Also added ATOI macro (which uses strtoul() if available,
+ with fall-back to atoi()).
+ * sed/sed.c(main): attempt to use COLS environment variable
+ to set a reasonable `lcmd_out_line_len'; added -l/--line-length
+ command-line options to set the new `lcmd_out_line_len' flag.
+ * sed/sed.c(usage): documented new -l/--line-length options.
+ * sed/execute.c(do_list): use `lcmd_out_line_len' variable
+ instead of `LCMD_OUT_LINE_LEN'.
+ * sed/execute.c: deleted now obsolete LCMD_OUT_LINE_LEN define.
+ * configure.in: added strtoul to the AC_CHECK_FUNCS call.
+
+Sat Oct 31 21:37:17 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c, sed/sed.h, sed/execute.c: added new `force_unbuffered'
+ flag. (Idea suggested by Frank Strauss <strauss@escape.de>.)
+ * sed/sed.c(main): added -u/--unbuffered command-line options
+ to set the new `force_unbuffered' flag.
+ * sed/sed.c(usage): documented new -u/--unbuffered options.
+ * sed/execute.c: changed the name of the `is_tty' flag in struct
+ input to a more generic `no_buffering'; also removed HAVE_ISATTY
+ conditional on this member.
+ * sed/execute.c(slow_getline): removed HAVE_ISATTY conditonal
+ compilation of this function.
+ * sed/execute.c(output_line): if force_unbuffered is set,
+ then force a fflush() even if writing to stdout.
+ * sed/execute.c(open_next_file): added handling of the
+ new `force_unbuffered' flag so that slow_getline()
+ will always be used for input.
+ * sed/execute.c(read_file_line): changed the (conditionally
+ compiled) test of `input->is_tty' to (unconditionally)
+ use the new spelling `input->no_buffering'.
+
+Thu Oct 15 12:08:09 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: deleted AC_ARG_PROGRAM call; this is already
+ done for us by AM_INIT_AUTOMAKE, and we were winding up
+ with a doubled-transform.
+
+Sun Sep 27 01:42:42 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * compile.c(compile_program): remove special-case code for matching
+ address range with a `0' beginning.
+
+ * compile.c(compile_address): change default addr_number to
+ be a pragmatically impossible countT value, instead of zero.
+
+ * execute.c: spell macro REGNEXEC() unconditionally instead of
+ playing with conditional definition of regnexec() macro.
+
+ * execute.c(match_an_address_p): added third argument (and changed
+ callers in match_address_p). Added special code to ignore
+ non-numeric matches when processing "line zero".
+
+ * execute.c(process_files): added a "line zero" pass through the
+ commands script.
+
+Sun Sep 27 00:20:53 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * compile.c(xofa,normalize_text): new functions.
+ * compile.c(compile_regex): cache last_compiled_re (with its
+ associated flags); add POSIXLY_CORRECT behavior for empty RE.
+ Make use of the new normalize_text() function.
+ * compile.c(setup_replacement): Make use of the new normalize_text()
+ function.
+
+Sat Sep 26 22:59:13 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * lib/regex-gnu.h: added missing prototype for regncomp().
+
+Mon Sep 14 20:47:23 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c(main): use EXIT_SUCCESS instead of 0, in case
+ we are built on a system (such as VMS) where EXIT_SUCCESS
+ is distinct from 0.
+
+Wed Sep 9 22:17:28 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/Makefile.am: added -I../intl the INCLUDES line; if we are
+ building in a directory outside the source tree and the system
+ we are building on does not have a <libintl.h> header, then
+ the build was failing, because libintl.h is a build-time
+ constructed source file.
+
+ * configure.in: tweaked version to be 3.02b.
+
+Wed Sep 9 19:28:14 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.02a released
+
+ * sed/compile.c(mark_subst_opts,read_label,compile_program):
+ wherever we accept a ; as a command terminator, also allow a } or
+ a # to appear. (This allows for less cluttered-looking scripts,
+ such as: sed '/foo/{x;G}' (instead of: sed '/foo/{x;G;}').)
+
+Wed Sep 9 18:17:07 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(compile_regex): use regncomp() instead
+ of regcomp(), so that a script with NULs in its REs
+ will work in the expected manner.
+
+ * sed/compile.c(ADDNUL,REGNCOMP): added support macros
+ for above.
+
+ * lib/regex.c(regncomp,regcomp): added regncomp() and
+ made regcomp() a simple wrapper function.
+
+Mon Aug 31 21:48:30 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c(compile_program): if the first address of
+ a range is the number 0 (or a 0~N sequence), start
+ out in the "a1_matched" state. This allows one
+ to match an initial chunk of a file without undue
+ convolutions for handling the case where the match
+ for the end of the sequence happens to be the first
+ line.
+
+Sun Aug 16 03:34:25 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(snarf_char_class,match_slash): simplify
+ handling of "premature newline" error. Also, get the
+ line number right in the error message if we encounter
+ a "premature newline" during char-class snarfing.
+
+Sun Aug 16 02:59:20 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c: added N_() markers and corresponding gettext()
+ (er, _()) calls.
+
+ * Merged in i18n contribution from Erick Branderhorst
+ <Erick.Branderhorst@asml.nl>. His ChangeLog entry
+ for the changes I've incorporated so far:
+
+1998-07-24 Erick Branderhorst <Erick.Branderhorst@asml.nl>
+ * configure.in (ALL_LINGUAS, AM_GNU_GETTEXT): nl
+ * sed/{sed.h,utils.c}: #include <libintl.h> #define _(String)
+ gettext (String)
+ * sed/sed.c: #include <locale.h>
+ * po/POTFILES.in: sed/{compile,execute,sed,utils}.c
+ * run gettextize -f
+ * acconfig.h: #undef LOCALEDIR ENABLE_NLS HAVE_CATGETS
+ HAVE_GETTEXT HAVE_LC_MESSAGES HAVE_STPCPY
+
+Fri Aug 14 13:52:57 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * Merged code from 3.02 with a branched development
+ tree from late May; the following (out-of-order)
+ changelog entry is from the branched tree.
+
+Sat May 30 12:23:16 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c, sed/execute.c: added (conditional on NOLEAKS macro)
+ code to free all dynamically allocated memory.
+
+ * sed/sed.c, sed/compile.c, sed/execute.c: much shuffling
+ of code --- ordered functions such that no forward
+ declarations are necessary, and placed all static prototypes
+ immediately before the actual function definition.
+ This accomplished two things: first, I find the new ordering
+ a more natural way to read the code than the previous
+ ordering, and second, the new ordering give the compiler
+ a better opportunity to discover inlining possibilities.
+ (The odd "prototype declaration+old-style definition"
+ style is used because I feel it is the least ugly way
+ of supporting K&R1 C while still getting the benefit of
+ prototypes when they are available.)
+
+ * sed/basicdefs.h: added MEMCPY() macro to hide the VCAST()s
+ that ought to be used with memcpy().
+
+ * sed/execute.c: Change calls to memcpy() to go through the
+ new MEMCPY() macro. Various prototypes: elide variable name
+ if it does not add any human-useful documentary information
+ to the bare type.
+
+ * sed/sed.c(main): Updated calls to compile_string() to add third
+ (length) argument. Changed call to obsolete close_all_files()
+ to a call to the new finish_program().
+
+ * sed/sed.c(map_file): Attempt to clean-up how "size" gets
+ cast and tested; remove spurious S_ISREG test (just let
+ mmap() fail if it doesn't support the underlying file type).
+
+ * sed/sed.c: Deleted old RX library stub declarations.
+
+ * sed/sed.c(map_file,unmap_file): added VCAST()s to the
+ mmap()/munmap() calls.
+
+ * sed/utils.c(ck_fclose): added support for ANSI C
+ functionality where passing a NULL argument means
+ to fclose() _all_ open streams. (Well, almost.
+ Only closes streams which were previously ck_fopen()ed,
+ as I don't care to figure out how to autoconf-detect
+ whether fclose(NULL) is properly supported on a given
+ platform.)
+
+ * sed/sed.h: Renamed `struct text_buf' member `text_len'
+ to `text_length'. Abstracted out `enum addr_types'
+ from `struct addr'; added new enum types num2,step,step_mod;
+ renamed mod to num_mod. De-unionized the regex,number,
+ {modulo-offset/step} components of `struct addr', in
+ anticipation of new features. Changed type of `a2' member
+ of `struct sed_cmd': now a pointer to save space.
+ Abstracted out `struct replacement' from `struct subst'.
+ Cleaned up declaration of `x' union of `struct addr'.
+ Fixed prototype for compile_string(). Replaced prototype
+ for old close_all_files() with one for new finish_program().
+
+ * sed/sed.h, sed/compile.c, sed/execute.c: changed to
+ simplify the data structures used for branches and
+ command blocks: simplified `struct vector'; made
+ `struct label' local to compile.c; `struct sed_cmd'
+ was modified to support a simpler design for branches
+ and blocks.
+
+ * sed/execute.c: Conditionally added ADDNUL() macro so that
+ the function call overhead is only incurred if nul_append()
+ _must_ be called. Made some commentary edits, including
+ typo fixes.
+ * sed/execute.c(resize_line): changed semantics of "len" argument
+ from "additional length" to "target length"; made
+ INITIAL_BUFFER_SIZE a minimum allocation length.
+ * sed/execute.c(str_append): adjusted to new resize_line()
+ semantics.
+ * sed/execute.c(line_copy): use FREE()+MALLOC() instead of
+ REALLOC() to avoid unnecessary copying of old text; add the
+ "try doubling first" allocation heuristic (just like
+ resize_line() does).
+ * sed/execute.c(line_exchange): new function.
+ * sed/execute.c(nul_append): make whole function (not just its
+ body) conditional on HAVE_REGNEXEC macro; adjust to new
+ resize_line() semantics.
+ * sed/execute.c(read_mem_line): use str_append() instead if
+ custom in-line code; compensate for new default of
+ "line.chomped = 0" in read_pattern_space() by setting
+ "line.chomped = 1" where appropriate.
+ * sed/execute.c(read_file_line): use different trigger to
+ determine that "buffer" is uninitialized, and do a full
+ initialization if required; use str_append() instead of custom
+ in-line code in two places; compensate for new default of
+ "line.chomped = 0" in read_pattern_space() by setting
+ "line.chomped = 1" where appropriate.
+ * sed/execute.c(output_line): don't bother calling ck_fwrite()
+ if length==0.
+ * sed/execute.c(release_append_queue): new function.
+ * sed/execute.c(dump_append_queue): use release_append_queue()
+ instead of in-line equivalent.
+ * sed/execute.c(read_pattern_space): conditionalize call to
+ dump_append_queue() for alleged performance reasons; changed
+ default "line.chomped" value to more common "1", and added an
+ assignment of "0" where this made a difference.
+ * sed/execute.c(match_an_address_p): deleted "is_addr2_p"
+ argument; reorder cases to match order in enum declaration; add
+ cases for new "addr_is_num2", "addr_is_step", and
+ "addr_is_step_mod" address types; alter nul_append() call to be
+ through ADDNUL() macro; fix to new struct member and enum
+ spellings in (formerly addr_is_mod); addr_is_num_mod case.
+ * sed/execute.c(match_address_p): remove oblsolete third argument
+ to calls to match_address_p(); alter references to sed_cmd
+ member a2 to reflect new pointer status; add new support for
+ a2->addr_type addr_is_step and addr_is_step_mod cases.
+ * sed/execute.c(do_subst): add NOLEAKS support logic; use
+ ADDNUL() wrapper to nul_append(); simplify replacement
+ expansion by using the new "struct replacement" data structure;
+ use line_exchange() function instead of custom in-line code.
+ * sed/execute.c(process_files): added NOLEAKS code.
+ * sed/execute.c(execute_program): updated implementations
+ of the `{', `}', `:', `b', and `t' commands; modified
+ `c' command gratuituosly; fixed potential memory
+ overrun in `D' command. Simplified how nonstandard
+ `loop increments' work. Use line_exchange() instead of
+ custom in-line code in 'x' case.
+ * sed/execute.c[EXPERIMENTAL_DASH_N_OPTIMIZATION conditional
+ code]: various modifications intended to keep this
+ code in sync with the new changes, but the code still
+ retains its previous bugs.
+
+ * sed/compile.c: use "exit(EXIT_FAILURE) instead of "exit(1)",
+ just in case we get compiled under VMS.
+ * sed/compile.c: Change type of prog_info.base to decrease needs
+ for casting; then elimiated the casts in question ;-).
+ * sed/compile.c: Added struct sed_label (moved from sed.h, then
+ modified).
+ * sed/compile.c: Removed "readit_p" flag from struct fp_list.
+ * sed/compile.c: Added module-global "blocks" variable.
+ * sed/compile.c: Extracted more error-message constant strings
+ to named variables.
+ * sed/compile.c(check_final_program): updated to
+ reflect new data structures and use new fucntions.
+ Added call to compile_regex() to release unneeded
+ memory.
+ * sed/compile.c: deleted obsolete new_vector() function;
+ abstracted new read_label() function; abstracted new
+ release_label() function; added new `blocks' module-static
+ variable.
+ * sed/compile.c(compile_program): updated implementations
+ of the `{', `}', `:', `b', and `t' commands; modified
+ initialization from NULL vector.
+ * sed/compile.c(compile_regex): added mechanism to
+ release memory consumed by the cached `last' RE.
+ * sed/compile.c(setup_jump,setup_label): updated
+ name (from setup_jump to setup_label) and prototype;
+ changed body to reflect data structure changes.
+ * sed/compile.c: Add OPEN_BRACE and CLOSE_BRACE macros for better
+ "vi" editing behavior.
+ * sed/compile.c(compile_filename,read_filename,get_writefile):
+ Replaced function compile_filename() with more orthogonal functions
+ read_filename(), get_writefile().
+ * sed/compile.c(compile_regex): Added ability to free the remembered
+ "last RE" in compile_regex (for benifit of "NOLEAKS" code).
+ * Made adjustments dictated by the change to struct sed_cmd which made
+ the a2 member a pointer-to-addr instead of an addr.
+ * sed/compile.c(setup_jump,read_label,setup_label,release_label):
+ Added functions read_label(), setup_label(), release_label(); deleted
+ function setup_jump().
+ * sed/compile.c(new_replacement,setup_replacement,release_replacement):
+ new functions.
+ * sed/compile.c: Adjusted to new spelling of text_buf member
+ ("text_length" instead of "text_len").
+ * sed/compile.c(new_vector): deleted function. (Due to new handling
+ of blocks, only one instance remained, and that one was just as
+ clear in-lined.)
+ * sed/compile.c(compile_string): Added third argument; it now
+ takes a counted string instead of a NUL-terminated string.
+ * sed/compile.c(compile_file): added variable "map_base" to
+ compensate for new type of prog_info.base.
+ * sed/compile.c(check_final_program): reflect new style of
+ handling blocks and struct sed_label.
+ * sed/compile.c(close_all_files,finish_program): replaced function
+ close_all_files() with more generic finish_program().
+ * sed/compile.c(read_text): added new feature: if first non-blank
+ character after the {a,i,c} command character is not "\", then
+ use the trailing text on that line as the (first) line of text.
+ Also added code conditional on NO_INPUT_INDENT to support the
+ "feature" of stripping leading blanks from each input line; I
+ do not read POSIX as permitting this behavior, nor do I think
+ it is a good idea, so it is disabled by default, but some have
+ argued that this blank-stripping is the "correct" behavior, so
+ I offer them the option of building their sed that way.
+ * sed/compile.c(compile_address): added xxx,+n and xxx,~n addressing;
+ simplified code.
+ * sed/compile.c(compile_program): added BAD_PLUS error detection;
+ adjusted to new cur_cmd->a2 pointer status; added addr_is_num2
+ detection; deleted pointless "a2->addr_number < a1.addr_number"
+ check (addr_is_num2 semantics handle this just fine); updated
+ code for '{', '}', ':', 'b', and 't' to reflect new design
+ of branch handling, including making use of new functions
+ related to the new design); added support for feature already
+ mentioned in read_text() where {a,i,c} commands are able to have
+ their text start on the same line as the command; changed some
+ error messages (hopefully for the better); localized variables
+ specific to individual commands (particularly 's' and 'y');
+ made use of new setup_replacement() function in 's' command.
+
+Mon Aug 10 19:58:49 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * doc/sed.texi, doc/sed.1: sedtut10.txt is apparently dead.
+ Deleted references to it and added a pointer to
+ http://seders.icheme.org/tutorials/. (Pointed out by
+ Joerg Heitkoetter <joerg@de.uu.net>.)
+
+Sat Aug 8 18:11:57 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * djgpp/config.btm: per request by Michel de Ruiter
+ <mdruiter@cs.vu.nl>, added "%1" to "%9" parameters.
+
+Mon Aug 3 11:44:55 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * doc/sed.texi: fix a couple of typos. (Submitted by
+ Alan Modra <alan@spri.levels.unisa.edu.au>.)
+
+Sat Aug 01 17:49:06 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.02 released
+
+ * configure.in: Because of code change in 3.01a, bump the
+ minor revision number for the release (now 3.02).
+
+Sun Jul 26 16:07:55 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01a released
+
+ * sed/compile.c(snarf_char_class): the POSIX char-class
+ recognition loop forgot to update its concept of "prev"
+ as the loop progressed.
+
+ * testsuite/Makefile.am: The dependency of version.good
+ on [testsuite/]Makefile introduced in the previous
+ release was botched -- it referred to "Makefile"
+ as "$(srcdir)/Makefile, which of course doesn't work
+ if you aren't building in the source tree.
+
+ * djgpp/Makefile.am: add forgotten "config.btm" EXTRA_DIST
+ member.
+
+ * configure.in: update version.
+
+Tue Jul 21 06:04:42 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01 released
+
+ * configure.in: mark as release version!
+
+ * Makefile.am: add BUGS and THANKS to the EXTRA_DIST target.
+
+ * testsuite/Makefile.am: add dependency of version.good
+ on [testsuite/]Makefile.
+
+Mon Jul 20 12:38:10 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * djgpp/config.btm: New file to support the 4DOS alternative
+ to command.com. (Sumitted by Eli Zaretskii on behalf of
+ an anonymous 4DOS user.)
+
+Fri Jul 17 00:36:34 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta18 released
+
+ * djgpp/config.sed: my "tweak" in beta17 was too
+ hastily considered. Back it out.
+
+ * configure.in: update to beta18.
+
+Wed Jul 15 01:02:15 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta17 released
+
+ * djgpp/config.sed: tweak/simplify s,,, commands at end.
+
+ * configure.in: update to beta17.
+
+1998-07-14 Eli Zaretskii <eliz@is.elta.co.il>
+
+ * djgpp/config.sed: Edit all the occurences of = in the context of
+ --option=value, including in the help messages, into
+ --option:value, but leave DOS-style d:/foo/bar file names intact.
+
+ * djgpp/config.bat: Use --srcdir:foo instead of --srcdir=foo.
+
+ * testsuite/Makefile.am (help, version): Remove temporary files
+ explicitly, don't use shell wildcards, so it works under DOS 8+3
+ limits.
+
+Thu Jul 9 13:06:00 PDT 1998 16:51:43 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta16 released
+
+ * djgpp/config.sed: tweak the configure script to use :
+ instead of = for --with-foo=bar option parsing, to
+ work around problems with how command.com handles =s.
+
+Wed Jul 8 16:51:43 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * djgpp/config.bat: correct inappropriate behavior that I
+ introduced in the beta13 changes (if first argument is
+ a directory, it needs to be handled as the --srcdir).
+
+ * testsuite/version.gin, testsuite/version.good, testsuite/Makefile.am:
+ Add target to automake to automatically update version.good
+ from (new file) version.gin, instead of hand-editing the version
+ number each release.
+
+ * testsuite/Makefile.am: miscellaneous gratuitious tweakage --
+ mainly adding $(RM) commands just because I didn't like
+ leaving the tmp* files from successful runs laying about.
+ Also some editorial comments.
+
+ * configure.in: update to beta16. Added and commented out
+ experiment with AC_OUTPUT() for testsuite/version.good.
+ Added code to properly handle bare (without =xxx)
+ "--with-regex" option.
+
+Sun Jul 5 21:02:16 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta15 released
+
+ * sed/utils.c(ck_fflush), sed/utils.h, sed/execute.c(output_line):
+ add and use new ck_fflush() function.
+
+Sun Jul 5 15:23:47 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(bad_prog): add more detail to error
+ messages about -e strings.
+
+Sun Jul 5 14:29:45 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(mark_subst_opts), sed/execute.c(do_subst):
+ Define better semantics for interaction of the `g' flag
+ with a numeric flag to the s/// command. It used to
+ be that the `g' command siezed control; now the first
+ (number-1) matches are skipped and then `g' gets control
+ after that. (It is not clear whether this is a feature
+ sneaking in during late beta, or a bug fix; the changes
+ involved were trivial, so I decided to treat it as a bug
+ fix.)
+
+ * configure.in, testsuite/version.good: update to beta15.
+
+Sat Jul 4 09:54:45 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta14 released
+
+ * sed/basicdefs.h, sed/compile.c, sed/execute.c:
+ per report by "Kaveh R. Ghazi" <ghazi@caip.rutgers.edu>,
+ copied the ISXXX macros from lib/regex.c so that
+ silly machines which require isascii() to be true
+ before the other isXXX() macros are valid will
+ still work.
+
+ * configure.in, testsuite/version.good: update to beta14.
+
+Thu Jul 2 23:46:13 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta13 released
+
+ * configure.in, acconfig.h: set USE_REGEX_GNU_H symbol if we
+ are going to be using lib/regex.c.
+
+ * lib/Makefile.am, lib/regex.h, lib/regex-gnu.h, lib/regex.c:
+ rename lib/regex.h to lib/regex-gnu.h, so that those who
+ choose to use a different regex implementation will not
+ pick-up lib/regex.h when doing "#include <regex.h>".
+
+ * sed/regex-sed.h, sed/Makefile.am, sed/compile.c, sed/execute.c,
+ sed/sed.c: create sed/regex-sed.h which acts as a switch
+ to choose either lib/regex.h or the user-supplied <regex.h>,
+ depending on the value passed to configure's --with-regex=
+ option.
+
+Thu Jul 2 17:22:31 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: if an alternative --with-regex= is given,
+ do an AC_CHECK_FUNCS(regnexec regexec) to ensure that
+ at least one of these functions is available. Also,
+ parallel changes for the default case.
+
+ * sed/execute.c, acconfig.h: retire use of the WITH_REGNEXEC
+ test macro in favor of HAVE_REGNEXEC test macro created
+ by above change.
+
+ * djgpp/config.bat: Play games to handle "install-sh",
+ DOS filename restrictions, GNU makefile default rules,
+ and getting a correct run of "configure" (contributed
+ by Eli Zaretskii <eliz@is.elta.co.il>).
+
+ * djgpp/Makefile.am, testsuite/Makefile.am, testsuite/Makefile.in,
+ Makefile.am, configure.in: Various automake targets
+ (such as distcheck) failed with old configuration.
+ The simplest solution was to just add these .am
+ files. (The testsuite/Makefile.in was just renamed to
+ testsuite/Makefile.am, then various redundant defines and
+ targets were deleted.) (Reported by Erick Branderhorst
+ <Erick.Branderhorst@asml.nl>.)
+
+ * testsuite/dc.good, testsuite/dc.inp: per suggestion from
+ Greg Ubben <gsu@romulus.ncsc.mil>, use base 16 output to
+ exercise even more of the dc.sed script.
+
+ * configure.in, testsuite/version.good: update to beta13.
+
+Sun Jun 28 16:21:02 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta12 released
+
+ * doc/sed.texi: Avoid mixing @code and @samp markups together:
+ they look ugly in Info. Use @url and @email instead of @example.
+ Add indexes. (Basis of changes contributed by Eli Zaretskii.)
+
+ * djgpp/*, Makefile.am: add support for the DJGPP compiler,
+ contributed by Eli Zaretskii <eliz@is.elta.co.il>.
+
+ * dc.sed, testsuite/Makefile.in, testsuite/dc.inp, testsuite/dc.good:
+ added this remarkable script, written and contributed
+ by Greg Ubben <gsu@romulus.ncsc.mil>, both as a work of
+ art for general admiration, and also for use in regression
+ testing.
+
+ * configure.in, lib/Makefile.am: add --with-regex=regexlib
+ option, which overrides the use of lib/regex.c.
+
+ * configure.in, testsuite/version.good: update to beta12.
+
+Fri Jun 12 16:41:48 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta11 released
+
+ * sed/compile.c: add module-static variables first_script
+ (for #n change below) and pending_text (for a/c/i change
+ below).
+
+ * sed/compile.c(compile_file), sed/compile.c(compile_program):
+ Instead of having #n trigger the -n option in *any file*,
+ have #n trigger the -n option only if they are the first
+ two bytes of the first script or script-file.
+
+ * sed/compile.c(compile_string), sed/compile.c(compile_file):
+ clear the first_script variable at end of these functions.
+
+ * sed/sed.h: tease out the struct text_buf declaration from
+ struct sed_cmd, so that a pointer to such can be passed
+ to new sed/compile.c(read_text) function.
+
+ * sed/compile.c(compile_program), sed/compile.c(read_text):
+ Tease out handling of text to a/c/i commands to new
+ read_text() function. Handle (via aid of pending_text
+ variable) texts which span more than one script/script-file
+ option. In particular, restore the ability to have this
+ work: sed -e '1i\' -e 'foo'
+
+ * sed/compile.c(check_final_program): close off any dangling
+ pending_text allocation.
+
+Thu Jun 11 11:17:46 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/execute.c(do_subst): fixed two bugs: s/ */X/g was failing
+ to match the final empty string after the end of the pattern
+ space; and /^foo$/s/o/x/3p was printing, despite the failure
+ to do a substition.
+
+Fri Jun 5 04:40:24 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: change the AC_ARG_WITH(regnexec, ...)
+ to be the more appropriate AC_ARG_ENABLE(regnexec, ...).
+
+ * configure.in, testsuite/version.good: update to beta11.
+
+Fri Jun 5 00:54:25 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta10 released
+
+ * sed/execute.c: forgot to P_() the prototype and
+ old-style the declaration for bootstrap_memchr()!
+
+Thu Jun 4 18:42:30 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(snarf_char_class): added code to
+ recognize \n or \<newline> sequence within a
+ char-class as the newline character.
+
+Tue Jun 2 11:56:02 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: added check for <sys/types.h> and
+ a AC_ARG_WITH(regnexec,...) check, to simplify use
+ of other regex libraries which have regexec() but
+ not regnexec(), with the corresponding loss of
+ functionality (regexps will not work right against
+ input lines which contain NULs).
+
+ * sed/execute.c: add nul_append() function, a #define
+ for a regnexec() -> regexec() macro (conditional on
+ the lack of the WITH_REGNEXEC symbol), and a couple
+ of calls to nul_append() (in match_an_address_p()
+ and do_subst()) to permit the use of the POSIX standard
+ regexec() function call instead of the suggested
+ regnexec() call.
+
+ * sed/compile.c, sed/execute.c, sed/sed.c: check for
+ <sys/types.h> and include it (before "regex.h") if
+ available. This makes it simpler to use the system's
+ regex library instead of the one in lib/regex.c, should
+ that be desired.
+
+Tue Jun 2 08:41:05 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/basicdefs.h: define VCAST macros to allow sed to
+ compile on systems which predate the definition
+ of "void *", and yet still get feedback about
+ stupid programming errors from systems which *do*
+ know about "void *"s. Also define MALLOC, REALLOC,
+ MEMDUP, and FREE macros to keep under control the
+ degree of code ugliness which would otherwise be
+ introduced in making use of the VCAST macro.
+
+ * sed/compile.c, sed/execute.c, sed/sed.c, sed/utils.c:
+ pervasively use the new VCAST, MALLOC, REALLOC, MEMDUP,
+ and FREE macros wherever appropriate.
+
+ * sed/utils.c, sed/utils.h: correct type of first arguments
+ to ck_fread() and ck_fwrite() to be [const] VOID *.
+
+ * sed/basicdefs.h, sed/execute.c: protect against
+ the rumored systems which stupidly #define __STDC__ 0.
+
+ * testsuite/help.good, testsuite/Makefile.in: make
+ the ``help'' test insensitive to the spelling of
+ the executable's name. Also, enhanced `make clean'
+ target.
+
+ * doc/sed.texi, doc/sed.1: correct documentation of `q'
+ command; fix typos.
+
+ * configure, testsuite/version.good: update to beta10.
+
+Sat May 30 17:28:00 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta9 released
+
+ * Makefile.am: make testsuite a normal SUBDIR.
+
+ * configure.in: discontinue using AC_ISC_POSIX --
+ check for -lcposix library instead; added
+ testsuite/Makefile to AC_OUTPUT list.
+
+ * lib/memmove.c(memmove): fixed wrong sense used
+ for HAVE_BCOPY test.
+
+ * sed/execute.c: checked more specifically for a version
+ of gcc which supports __attribute__ (i.e., >= 2.7).
+
+ * testsuite/*: renamed files to fit 14 char limit.
+
+ * testsuite/Makefile, testsuite/Makefile.in: Makefile
+ renamed to Makefile.in and then modified so that
+ "make -j check" from top directory will work.
+
+ * testsuite/subwrite.sed, testsuite/writeout.sed: changed
+ file name of the "w" command to be consistent with the
+ new naming used in testsuite/Makefile.in.
+
+ * doc/sed.1, doc/sed.texi: fixed some typos, formatting
+ glitches, and poor wordings.
+
+Sat May 30 04:02:29 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: specify that config.h is to be derived
+ from config_h.in in order to avoid the braindead
+ DOS filesystem limitations.
+
+Fri May 29 21:56:30 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(compile_address), doc/sed.texi: gave
+ a better definition to the meaning of N~0 address
+ forms -- N~M addresses now mean that lines match
+ when there exists a non-negative x such that
+ lineno == N+x*M.
+
+Fri May 29 12:07:38 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(compile_address),
+ sed/execute.c(match_an_address_p): update semantics of
+ N~M address form: now N is the first line which will
+ match and M is the step between succeeding matches.
+ If N<M this works out to the same as before, but the
+ new behavior for N>=M seems more useful.
+
+ * doc/sed.1, doc/sed.texi: update documentation of N~M
+ address form; added "Other Resources" node to sed.texi;
+ minor formatting changes to some items in sed.1 with
+ an eye to improving clarity.
+
+ * configure.in, testsuite/version.good: update to beta9.
+
+Sat May 23 20:04:31 HST 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta8 released
+
+ * sed/compile.c(compile_regex): forgot to make last_re be
+ a *copy* of the buffered text in today's earlier fix.
+
+ * sed/execute.c(read_file_line): EOF check was wrong --
+ it forgot to allow for the possibility that we were
+ appending to the end of the ``line'' (instead of merely
+ reading a fresh line).
+
+Sat May 23 18:07:18 HST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c(compile_regex): don't track compiled version
+ of regex -- the modifiers may change. Track the regex
+ source instead. (For "last regex" (aka //) notation.)
+
+ * configure.in, testsuite/version.good: update to beta8.
+
+Sat May 23 16:07:09 HST 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta7 released
+
+ * sed/execute.c: #undef'd EXPERIMENTAL_DASH_N_OPTIMIZATION
+ because its code is buggy.
+
+Tue May 19 17:03:52 HST 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c: label rx library code as such with #ifdefs
+ (instead of just #if 0).
+
+ * sed/compile.c(compile_program): make incremental
+ improvement to the "Unknown command" error message.
+
+Sat May 16 23:16:26 HST 1998 Ken Pizzini <ken@gnu.org>
+
+ * testsuite/Makefile: simplify: get rid of automatic run
+ against system's sed; don't time by default; allow for
+ alternative comparison command.
+
+ * configure.in, testsuite/version.good: update to beta7.
+
+Wed May 13 21:44:28 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta6 released
+
+ * lib/Makefile.am: fix spelling of libsed_a_LIBADD in
+ libsed_a_DEPENDENCIES.
+
+ * configure.in, testsuite/version.good: update to beta6.
+
+Wed May 13 14:38:08 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta5 released
+
+ * sed/execute.c(do_subst): added not_bol_p variable to track when
+ we have iterated past the beginning of the pattern.
+ [Thanks to Jim Meyering <meyering@ascend.com> for the bug report.]
+
+Wed May 13 13:54:04 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/execute.c(bootstrap_memchr): new function. When
+ bootstrapping we don't know if we are on a 64-bit machine,
+ so lib/memchr.c breaks. Supply this (slow) implementation
+ just to get us bootstrapped.
+
+ * bootstrap.sh: add a #define BOOTSTRAP symbol; add -I.
+ for emphasis for the compiles in sed/; be explicit
+ about what files we're bothering to compile.
+
+ * configure.in, testsuite/version.good: update version
+ to beta5.
+
+Wed May 13 06:39:06 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta4 released
+
+ * rename writeout.good? to wrtout?.good and subwrite.good? to
+ subwrt?.good to comply with DOS 8+3 file name restrictions.
+ [Eli Zaretskii <eliz@is.elta.co.il> suggested this to
+ simplify DJGPP ports, and it was easy.]
+
+ * testsuite/Makefile: reflect above name changes.
+
+Wed May 12 21:09:32 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/sed.c(usage): fix non-portable omission of \n\ at end of
+ lines within long string.
+
+ * sed/sed.c(main): remove spurious argument to fprintf() in the
+ 'V'ersion output.
+
+ * sed/execute.c(line_append): embed newline between the two
+ text fragments unconditionally.
+
+ * sed/execute.c(do_subst): change structure assignment to memcpy()
+ (for portability reasons).
+
+ * README.bootstrap: suggest using -w option.
+
+Tue May 12 10:02:37 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: use AC_REPLACE_FUNCS where appropriate.
+
+ * lib/Makefile.am: updated to reflect AC_REPLACE_FUNCS change in
+ configure.in.
+
+ * lib/memchr.c lib/memcmp.c: revert to standard GNU versions.
+
+ * lib/alloca.c: added this missing file.
+
+ * testsuite/version.good: updated for new version identifier.
+
+Mon May 11 18:50:56 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta3 released
+
+ * sed/Makefile.am: fix INCLUDES to work right with VPATH.
+ [Thanks to Jim Meyering <meyering@ascend.com> for the bug report.]
+
+ * sed/sed.c(usage): make --help output more user-friendly?
+
+ * sed/execute.c(execute_program): fix bug in 'x' command introduced
+ in the alleged portability fix of May 9.
+
+ * configure.in: update version to 3.01-beta3.
+
+ * testsuite/version.good, testsuite/help.good: freshen with
+ latest output.
+
+Sat May 9 22:35:45 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta2 released
+
+ * sed/sed.c: add #include <sys/types.h> in HAVE_MMAP
+ block (needed on some machines).
+
+ * lib/memmove.c: #include <memory.h>, if HAVE_MEMORY_H.
+
+Sat May 9 21:29:00 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in: remove dangling references to rx library;
+ added HEADER and FUNC checks for items used by source in
+ lib/.
+
+ * lib/ansidecl.h, lib/memcopy.h, lib/pagecopy.h, lib/string.h
+ lib/memcpy.c, lib/memmove.c: deletes these files. There
+ are still pieces of glibc missing to support these, and
+ it isn't worth the headache right now.
+
+ * lib/memmove.c: de novo, simpler version. Uses bcopy()
+ if available, and slow-but-simple code if not.
+
+ * lib/Makefile.am: remove references to deleted files.
+ Added forgotten reference to memcpy.c. Re-ordered
+ SOURCE entries to reflect dependencies for systems
+ which lack ranlib.
+
+ * sed/basicdefs.h: updated to reflect above changes to lib/,
+ and experience with non-STDC compilers.
+
+ * lib/regex.c: made regerror() function publicly visible.
+
+ * lib/strerror.c: use old-style function declaration.
+
+ * sed/compile.c, sed/execute.c, sed/sed.c, sed/utils.c,
+ sed/sed.h, sed/utils.h: ensure that private definitions of
+ some symbols do not cause problems when #include'ing system
+ headers (mainly by re-ordering the #include directives).
+ (This is particularly an issue for bootstrap.sh runs.)
+
+ * sed/execute.c (execute_program): use memcpy() instead of
+ structure assingment ('x' command), for portablility to
+ old compilers.
+
+ * sed/execute.c (slow_getline): use old-style function
+ declaration, with a P_ prototype.
+
+ * sed/sed.c: change the type of the fallback MAP_FAILED
+ definition to work on archaic systems. (Modern systems
+ should be defining it themselves, so the change from
+ void * shouldn't be a problem.)
+
+ * bootstrap.sh, README.bootstrap: actual testing of bootstrap
+ code revealed that I was too optimistic. Redesigned and
+ replaced implementation.
+
+ * testsuite/Makefile: ignore errors from reference-implementation
+ seds that aren't up to snuff.
+
+ * testsuite/help.good, testsuite/version.good: update to
+ current version's output.
+
+Fri May 8 15:08:28 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ *** Version 3.01-beta1 released
+
+ * sed/sed.c (main, usage): once again tweak the --help and
+ --version output to bettery comply with GNU coding standards.
+
+ * testsuite/help.good, testsuite/version.good: update to
+ reflect above change.
+
+ * doc/sed.texi: fix "Invoking" node's spelling to comply
+ with GNU standards.
+
+Fri May 8 11:43:10 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * doc/sed.1, doc/Makefile.am: wrote (very basic) man page.
+
+Thu May 7 20:40:21 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * lib/Makefile.am, lib/memmove.c, lib/memchr.c, lib/regex.c,
+ lib/memcpy.c, lib/regex.h, lib/memcopy.h, lib/string.h,
+ lib/pagecopy.h, lib/ansidecl.h: grab yet-another-version
+ from gnu.org for baseline and/or edit copyright boilerplate
+ using official lgpl2gpl.sed script. Take care not to
+ loose regnexec() interface or special conditional-compilation
+ code.
+
+Wed May 6 23:35:12 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * lib/regex.c, lib/regex.h: take from grep-2.1 distribution,
+ then trivially added the regnexec() interface.
+
+ * sed/sed.c, sed/compile.c, sed/execute.c: made modifications
+ to work with regex instead of rx.
+
+ * rx/*: deleted directory; the code is just too slow.
+ I think it will be easier to extend regex to fully
+ support POSIX.2 than to tune rx to be reasonable.
+ Even if this supposition is wrong, I'd rather make
+ the 3.01 release with the slightly deficient regex.
+
+ * Makefile.am lib/Makefile.am, sed/Makefile.am: made changes
+ related to the substitution of regex for rx.
+
+ * lib/Makefile.am, sed/Makefile.am: since regex is not a
+ ``compatability'' module, changed name of library to
+ ``libsed.a''.
+
+ * lib/memchr.c, lib/memcpy.c, lib/memmove.c: add conditional
+ compilation code to leave zero-sized .o file if system
+ already supports the implemented function.
+
+ * testsuite/help.good, testsuite/version.good: brought
+ up-to-date (once again).
+
+ * NEWS, ANNOUNCE: changes to reflect this batch of changes.
+
+Wed May 6 18:40:47 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/execute.c: discovered awful bug in '}' handling:
+ it could read past the end of vec (because `n' was
+ being decremented below zero)! Needed to "continue"
+ instead of "break".
+
+Tue May 5 14:34:38 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * doc/sed.texi, doc/version.texi: wrote some rudimentary
+ texinfo documentation.
+
+ * ANNOUNCE, NEWS, README, README.rx, Makefile.am:
+ more updates for the upcoming beta-release.
+
+ * sed/compile.c, sed/execute.c, sed/sed.c, sed/utils.c,
+ sed/sed.h, lib/strerror.c: update copyright notice text.
+
+Fri May 1 15:41:37 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/execute.c (match_an_address_p, match_address_p): if
+ the second element of an address range is a line number,
+ and that line number is *less than* (or equal to) the
+ current line number, we only match the one line (per
+ POSIX.2, section 4.55.7.1). [Bug discovered as reported
+ in the seders mailing list FAQ.]
+
+ * AUTHORS, NEWS, acconfig.h, configure.in, doc/Makefile.am,
+ lib/Makefile.am, sed/Makefile.am, lib/README,
+ testsuite/help.good, testsuite/version.good:
+ Updated in anticipation of the 3.01-beta1 release.
+ Reorganized development source tree to make creation
+ of a distribution simpler. Most notable changes were
+ to the various Makefile.am files and configure.in, but
+ some minor edits (such as deleting or changing #include
+ directives) have been made in many other source files.
+
+ * bootstrap.sh, README.bootstrap: created a mechanism for
+ creating sed on a system which lacks a working sed.
+
+Thu Apr 16 23:52:11 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed.h, sed.c, execute.c, compile.c: did a spell-check on
+ the comments; fixed several typos.
+
+Thu Apr 16 13:43:01 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c (do_subst): fixed bug where the "replaced" flag
+ was being set to one inappropriately when at least one
+ but fewer than sub->numb matches of the regexp were found.
+ (Thanks to Simon Taylor <staylor@hermes.iaccess.com.au>
+ for the bug report.)
+
+Wed Apr 15 11:35:31 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed.h, sed.c, compile.c, execute.c: having a concern that
+ a cast was being done inappropriately, and realizing that
+ there is no quick way to locate all casts in a program, I
+ went through and marked all casts with a simple macro.
+ Now it is a simple matter to locate the casts, and it is
+ also a simple matter to turn of casts for a lint session
+ (if it should be desired).
+
+Wed Apr 15 10:29:21 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * compile.c, sed.c: redo compile phase so that brace
+ expressions can be spread across multiple files.
+ For example:
+ printf '{' >a; printf 'l;d' >b; printf '}' >c
+ sed -f a -f b -f c foo
+ will now compile (and work), instead of complaining
+ about an unmatched '{'. The mess created in compile.c
+ allowed a little simplification to the command-line
+ processing of "-e" options in sed.c.
+
+ sed.h: added (opaque) err_info member to struct vector;
+ added comments to the members of struct vector.
+
+Wed Apr 14 23:50:50 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed.h, sed.c, compile.c, execute.c: added types countT and
+ flagT in order to clarify what various "int"s were doing.
+ Also makes it easy to change the type used for counts
+ (for example, to "unsigned long long") if desired, although
+ there are still some gotchas (such as the printf() format
+ for the '=' command).
+
+Tue Apr 14 17:34:54 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c (execute_program, process_files, count_branches,
+ shrink_program): Added a first attempt at program optimization.
+ We now can quit early if we are running with the "-n"
+ and all of the commands are known to be valid only for
+ lines less than the current line. Thus the "sed" in
+ "foo | sed -n 1,2p" will print read three lines, printint
+ the first two, and then quit, regardless of how much longer
+ "foo" might run or output. This optimization does not buy
+ much in most cases (it sometimes even costs a little),
+ but when it does help it can help big. The code is
+ all conditionally compiled based on the
+ EXPERIMENTAL_DASH_N_OPTIMIZATION symbol being #defined,
+ so it can be easily omitted if it causes problems.
+
+Tue Apr 14 12:25:06 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c (test_dollar_EOF, last_file_with_data_p):
+ test_dollar_EOF() was incorrectly returning a false (0)
+ when there were unprocessed files, none of which had any
+ data (either unopenable or zero-length). Created
+ last_file_with_data_p() to detect this situation, and
+ modified test_dollar_EOF() to make use of it.
+
+Thu Apr 2 23:02:18 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * compile.c (match_slash): match_slash() did not handle
+ [.coll.], [=equiv=], and [:class:] sequences within a
+ character class. Added snarf_char_class() [which is a
+ remote derivative of parse_char_class() from GNU ed-0.2]
+ to deal with the details, and altered match_slash()
+ to make use of it. Also created the trivial
+ add_then_next() to avoid clutter in snarf_char_class().
+
+Thu Apr 2 20:34:42 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c, sed.c, sed.h: There was a severe bug in
+ how the code handled "sed 5n a b" when "a" consists
+ of exactly five lines -- it behaved like "sed 5q a b"!
+
+ Rearranged where files get opened -- large scale
+ changes primarily involving main(), process_files(),
+ and read_pattern_space(), but also touching on several
+ other parts of execute.c. The read_pattern_space()
+ function became unwieldly and parts were split into
+ open_next_file(), closedown(), read_always_fail(),
+ read_mem_line(), and read_file_line(). The
+ at_end_of_file_p() function became obsolete and was
+ eliminated; test_dollar_EOF_p() was updated. A few
+ global and module-static variables were elminated, and
+ "struct line" was extended; comments were added to the
+ "struct line" declartation to document some important
+ dependencies in it.
+
+ I undertook the reorganization with dread, but I
+ feel that the new organization is an improvement
+ well beyond just fixing the bug that inspired it.
+
+Thu Apr 2 01:16:25 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * execute.c (read_file_line, slow_getline): the fread()
+ buffering code gives insufficient feedback to a user
+ running sed with a tty input device, so I created
+ slow_getline() for reading from a tty device.
+ Additionally, EOF detection has been made a little more
+ sensitive to avoid requiring multiple EOFs to be entered
+ from a tty.
+
+ * configure.in: added isatty() check.
+
+Wed Apr 1 11:04:30 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * configure.in (CPPFLAGS, LDFLAGS, LIBS):
+ Set to appropriate values if large file support needs
+ explicit enabling. Code fragment taken from a 1997-10-25
+ patch to gawk by Paul Eggert <eggert@twinsun.com>
+
+Thu Aug 14 17:43:27 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * utils.c (ck_fclose): modified to ignore NULL parameter.
+
+Thu Aug 14 12:08:45 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * execute.c: tweaked execute_program() to eliminate
+ gratuitous "goto" usage.
+
+Thu Aug 14 11:30:04 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * compile.c: added case-insensitive modifier ('I') to
+ address and s/// regexps. The s/// case also accepts
+ the more popular 'i' modifier. (The address regexp
+ cannot use 'i' as a modifier, as that conflicts with
+ the use of the 'i'nsert command.)
+
+Thu Aug 14 09:29:06 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * compile.c: abstracted out match_slash() from the s///, y///,
+ and address-regexp special-case codes.
+
+ * execute.c: made dump_append_queue() use ck_fread() instead
+ of hand-rolled error checking.
+
+Mon Jul 28 10:50:41 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * sed.c, sed.h, execute.c: POSIX.2, section 4.55.7, says that
+ a newline must end *every* output line. But I think that
+ it is useful (when seding a binary file) to omit a trailing
+ newline if the input lacks one. Thus the addition of
+ POSIXLY_CORRECT behavior.
+
+ * execute.c: however, when seding multiple files my feeling
+ is that it makes sense to have each file but the last
+ behave as-if it ended in a newline. Modified read_pattern_space()
+ accordingly.
+
+ * utils.c: realized that add1_buffer(), for performance reasons,
+ shouldn't be calling memcpy() (indirectly via add_buffer()),
+ so rewrote it.
+
+Sat Jul 26 23:08:28 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * execute.c: attempted to make read_pattern_space more
+ efficient for the the non-mmap() case.
+
+ * utils.c, utils.h, execute.c: new function ck_fread()
+ created and used.
+
+Sat Jul 26 20:22:14 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * execute.c, compile.c, sed.c: abstracted the mmap()
+ interface into map_file()/unmap_file() [sed.c], and
+ changed the ad-hoc code in compile_file() [compile.c]
+ and process_file() [execute.c] to make use of the new
+ interface.
+
+Sat Jul 26 19:45:46 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * execute.c, compile.c, configure.in: Check to see if mmap()
+ is available; if so make use of it on regular files.
+
+ * compile.c: compile_file() now closes the input file
+ when it is through!
+
+Sun Jul 20 23:57:02 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * compile.c: modified parsing to permit whitespace in more
+ places where it makes sense;
+ added backslash escaping to the y/// command, per POSIX.
+
+ * execute.c: Merged append_pattern_space() into read_pattern_space();
+ moved body of 's' command to new function do_subst();
+ moved body of 'l' command to new function do_list();
+ changed output of 'l' command to conform to POSIX.2;
+ made line handling conform to POSIX; added output_line() function;
+ redesigned append-space algorithm; added append_queue structure and
+ the next_append_slot() and dump_append_queue() functions.
+
+ * sed.h: moved the definition of what is now struct subst
+ outside of the definition of struct sed_cmd.
+
+Sat Jul 19 16:29:09 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * sed.c, execute.c, sed.h, Makefile.am: Separated out the
+ pieces dealing with executing the program from the top-level
+ parameter parsing and control.
+
+Sat Jul 19 01:16:35 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * sed.c, compile.c, sed.h, Makefile.am: separate out the
+ pieces dealing with compiling the program from the pieces
+ dealing with interpreting the result.
+
+ * compile.c: add functions in_nonblank() and in_integer(),
+ and change interface to compile_address() with an eye
+ to making code clearer.
+
+Fri Jul 18 13:35:50 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * utils.c: attempt at a quasi-unification of the
+ STDC and traditional C approaches to panic().
+
+ * sed.c: eliminate some gratuitous bit twiddling.
+ (Using flag bits can be a useful technique, but
+ this code is cleaner without them.)
+
+ * sed.c: place mutually exclusive members of struct addr
+ within a union, mainly to document the exclusivity;
+ eliminate unused structure members from struct fp_list;
+ eliminate unnecessary module-global variables;
+ remove some #if 0 code that is too odd to keep;
+ allegedly simplified the 'l' case of execute_program();
+ allegedly simplified inchar();
+ localized some static variables;
+ renamed some variables to better document their purpose;
+ removed some goto-s rendered obsolete by other changes.
+
+Thu Jul 17 15:30:44 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * utils.c, utils.h, sed.c: added and made use of
+ ck_free() function.
+
+ * utils.c, utils.h, sed.c: changed all the *_buffer()
+ functions to take/return an incomplete type
+ "struct buffer *" instead of using VOID *.
+
+ * utils.c, utils.h, sed.c: renamed "finish_buffer()"
+ to "free_buffer()", on the premise that the new
+ name better describes the function's purpose.
+
+Wed Jul 16 13:52:14 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * utils.c, utils.h, sed.c: added and made use of
+ ck_memdup() function.
+
+ * sed.c: protected a call to add1_buffer() in
+ compile_program() which could have tried to
+ push an EOF if a a/i/c command ended with
+ a '\', EOF sequence.
+
+ * utils.c: added sanity check to add1_buffer() so that
+ EOF will not be added to the buffer.
+
+Wed Jul 16 03:56:26 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * configure.in, compat.h, compat.c: added memchr.
+
+ * sed.c: got rid of arbitrary NUM_FPS limit;
+ made global functions and variables "static" where appropriate;
+ make various cosmetic changes, hopefully improving readability;
+ simplified some redundant predicates;
+ simplified some code, but nothing fundamental (yet?).
+
+Wed Jul 16 00:24:54 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * alloca.c, getopt.c, getopt.h, getopt1.c: updated from
+ versions in textutils-1.22.
+
+ * Makefile.in, Makefile.am, configure.in: put in automake support.
+
+ * basicdefs.h, compat.h, compat.c [, sed.c, utils.c]: took out
+ some very ugly compatibility #ifdefs and packaged into one
+ place.
+
+ * sed.c, utils.c: some gratuitous formatting changes.
+
+ * utils.c: changed datatype of utils_id_s in order to
+ eliminate arbitrary array size.
+
+Sun Jul 13 17:00:26 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * sed.c, utils.c, utils.h: de-linting oriented cleanup.
+
+Sun Jul 13 00:46:48 PDT 1997 Ken Pizzini <ken@gnu.org>
+
+ * sed.c: fixed bug which caused SEGV for files missing a
+ final newline. Corrected calls to regnexec to pass the
+ proper parameters, in the proper order.
+
+Sat Dec 30 20:16:59 1995 Tom Lord <lord@beehive>
+
+ *** Version 3.00 released
+
+ * sed.c: Use posix entry points to regexp functions.
+ Fix enough bugs to pass the test-suite.
+
+....... Jason Molenda <crash@cygnus.com>
+
+ * testsuite/: trippy test suite.
+
+
+Wed May 11 07:46:24 1994 Chip Salzenberg (chip@fin.uucp)
+
+ *** Version 2.05 released
+
+ * sed.c (compile_address): Recognize numeric addresses.
+ Fixes typo made during installation of "~" feature.
+
+Sat Apr 30 17:17:38 1994 Tom Lord (lord@x1.cygnus.com)
+
+ *** Version 2.04 released
+
+ * sed.c: applied a patch from
+ From: kap1@tao.cpe.uchicago.edu (Dietrich Kappe)
+
+ Dietrich writes:
+
+ As my contribution to the creeping feature creature in sed,
+ here is a new type of address. The address has form n~m,
+ which means "the line number is equal to n modulo m." The
+ modifications to sed are trivial, and the general
+ usefulness of this address should be obvious. If m is 0 or
+ missing, 1 is used in its place (could be a bug or a
+ feature :-).
+
+Sat Apr 30 17:17:38 1994 Tom Lord (lord@x1.cygnus.com)
+
+ * rx.c (solve_destination): protect `solution' more carefully.
+ This is a cleanup of a patch from Kevin Buettner
+ (kev@cujo.geg.mot.com).
+
+Sat Apr 30 17:17:38 1994 Tom Lord (lord@x1.cygnus.com)
+
+ * rx.c: make translation tables unsigned chars
+
+ * sed.c (main): Compile accumulated -e commands as
+ soon as a -f command comes along. This ensures that
+ the commands are executed in the right order.
+
+Mon Oct 25 14:41:47 1993 Tom Lord (lord@rtl.cygnus.com)
+
+ * sed.c (execute_program): 'w' flushes the buffer after it
+ writes -- diagnosed by doug@research.att.com. 'r' and 'w' to
+ the same file is now supported -- hopefully even in a way that
+ satisfies Posix (it now behaves differently from some
+ /bin/sed's and the spec is hard to read so i'm not sure).
+
+ Also, 'r' of a non-existent file is now permitted.
+
+Mon Oct 11 21:06:10 1993 Tom Lord (lord@cygnus.com)
+
+ * sed.c (execute_program): remember that 'b' and 't' are more
+ like longjmp than goto. Patch from tom@basil.icce.rug.nl (Tom
+ R.Hageman)
+
+ * rx.c: patch from From: fin!chip@rutgers.edu (Chip
+ Salzenberg) to get rid of compiler warnings.
+
+
+Sat Aug 7 01:04:59 1993 Tom Lord (lord@unix7.andrew.cmu.edu)
+
+ *** Version 2.03 released
+
+ * sed.c (compile_regex): report error messages for bogus
+ regexps.
+
+ SEE ALSO: ChangeLog.rx
+
+
+Wed Jul 21 00:28:03 1993 Tom Lord (lord@unix8.andrew.cmu.edu)
+
+ * alloca.c: upgraded to a more recent version
+
+ * rx.c (re_search_2): prefer matches with longer
+ subexpressions to those with shorter ones, giving precedence
+ to low numbered subexpressions.
+
+ * rx.c (re_compile): don't free `params' if its null.
+
+Fri Jul 16 01:12:08 1993 Tom Lord (lord@unix8.andrew.cmu.edu)
+
+ * rx.[ch], sed.c: rx replaces regex.
+
+
+
+Thu May 27 11:13:03 1993 Tom Lord (lord@unix3.andrew.cmu.edu)
+
+ * sed.c (execute_program, match_addr): caught more cases
+ that need to be sensitive to a missing \n at EOF.
+
+Fri May 21 00:39:22 1993 Tom Lord (lord@unix8.andrew.cmu.edu)
+
+ * sed.c (execute_program): apply gaumondp's patch
+ to fix '\xabcxs/foo/bar/'.
+
+ * sed.c (execute_program):
+ If a second address is a regexp, never match it on the
+ same line as the first address.
+
+ * sed.c (compile_regexp):
+ Numeric ranges x,y s.t. y < x are now treated as x,x.
+ There was a bug in that they were being handled like x,x+1.
+
+ * sed.c (execute_program, read_pattern_space,
+ append_pattern_space) don't add newlines to lines
+ that don't have them.
+
+Wed May 19 13:34:45 1993 Tom Lord (lord@unix9.andrew.cmu.edu)
+
+ * sed.c (compile_program): grok \\n in comments.
+
+Mon May 17 16:34:50 1993 Tom Lord (lord@unix9.andrew.cmu.edu)
+
+ * alloca.c: new (standard) file
+
+ * configure.in: AC_CONSTified
+
+ * sed.c (compile_program): properly diagnose the error of
+ a missing command (e.g. sed /x/). (thanks gaumondp)
+
+ * sed.c (compile_regexp): handle character classes correctly.
+ Thanks gaumondp@ERE.UMontreal.CA
+ and schwab@issan.informatik.uni-dortmund.de.
+
+Thu May 6 12:37:18 1993 Tom Lord (lord@unix10.andrew.cmu.edu)
+
+ * sed.c (compile_filename, execute_program): don't use
+ `access' or `/dev/null'.
+
+ * sed.c (execute_program): 'N' at EOF should delete the pat buf.
+
+ * sed.c (compile_filename): truncate, don't append files
+ being openned for `w' or `s///w'
+
+ * sed.c (execute_program): -n switch shouldn't effect `i' or `c'.
+
+ * sed.c (compile_program): don't compile unescaped newlines
+ into the substitution string of an `s' command (they are an error).
+
+ * sed.c (compile_regex): correctly skip over character
+ sets that contain `]'.
+
+ * sed.c (execute_program): patch from gaumondp
+ Correctly handle empty-string matches in the case of an `s'
+ command with a repeat count.
+
+ * sed.c (compile_program): patch from gaumondp@ere.UMontreal.ca.
+ Don't consume characters after the label of a `b', `t' or `:' command.
+
+ * sed.c (compile_program): unmatched open braces are an error.
+
+ * sed.c (compile_file): when consuming an initial comment,
+ count lines correctly.
+
+Wed Nov 18 02:10:58 1992 Tom Lord (lord@unix2.andrew.cmu.edu)
+
+ * sed.c (execute_program): Made s///p print even if -n was
+ specified.
+
+ * sed.c (compile_string): Changed the type of this function to
+ fix a compile warning.
+
+Wed Nov 4 17:15:34 1992 Tom Lord (lord@unix7.andrew.cmu.edu)
+
+ * sed.c (main): Initialize the hold area to contain "\n"
+ instead of "". In execute_program, all lines are expected
+ to be newline terminated. Also, if H is the first command
+ in the script, the result is a pattern buffer that begins
+ with a blank line. Thanks to pinard@iro.umontreal.ca
+ (Francois Pinard) for pointing out this and many other bugs.
+
+ * sed.c (execute_program): Fixed a case of `D' command.
+ Thanks Chris Weber <weber@bucknell.edu>
+
+ * sed.c: added new tests of no_default_output to make -n work.
+ Thanks Andrew Herbert <andrew@werple.apana.org.au>
+
+ * sed.c, configure.in,Makefile.in: autoconfed bcopy and const.
+ Thanks "J.T. Conklin" <jtc@gain.com>
+
+ * sed.c: made prog_cur, prog_start, and prog_end unsigned so
+ that users could write `sed -e s/ÿ/foo/g'.
+
+Tue Oct 13 00:04:05 1992 Tom Lord (lord@unix3.andrew.cmu.edu)
+
+ * sed.c (execute_program): fixed the cycling behavior of 'D'
+
+ * sed.c: integrated patch that closes files
+
+ * sed.c: changed regexp syntax
+
+Fri May 22 15:11:12 1992 Tom Lord (lord at moriarty.bh.andrew.cmu.edu)
+
+ * regex.c: this is not my change, but a pointer to the fact
+ that karl@gnu fixed some regexp bugs that were plaguing sed.
+
+Thu Apr 30 13:02:21 1992 Tom Lord (lord at unix3.andrew.cmu.edu)
+
+ * sed.c (compile_program, execute_program)
+ subprograms are now compiled with an explicit continuation ;)
+ return_v and return_i in struct vector. execute_program
+ no longer recurses to execute subprograms (case '{') and now
+ understands a return instruction (case '{').
+
+Tue Apr 28 17:13:04 1992 Tom Lord (lord at unix7.andrew.cmu.edu)
+
+ * sed.c (compile_address) added \?regexp? syntax for addresses.
+
+ * sed.c (main) added {} intervals to the obscure regexp
+ syntax.
+
+ * sed.c (compile_program) after calling compile_address,
+ normalize numeric addresses (make a2.addr_number > a1.addr_number).
+ This is necessary because line numbers must match exactly,
+ but sed does not try to match a2 until after a1 has matched,
+ yet a1,a2 where a2 <= a1 is defined to be equivelent to
+ a1,a1+1
+
+Sat Feb 29 10:55:54 1992 David J. MacKenzie (djm@nutrimat)
+
+ * sed.c (usage): Document long options as starting with `--'.
+
+Mon Dec 9 23:56:40 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * sed.c: Include sys/types.h, for new regex.h.
+
+Tue Nov 5 02:16:01 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * utils.c: Change NO_VFPRINTF to VPRINTF_MISSING, for
+ compatibility with autoconf.
+
+Mon Sep 2 22:02:40 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * sed.c (compile_regex): Treat \ as a normal character when in
+ a char class.
+
+Thu Aug 8 00:15:33 1991 David J. MacKenzie (djm at bleen)
+
+ * Version 1.08.
+
+ * sed.c (compile_filename): If reading a file fails, read
+ /dev/null instead. It's what Unix and POSIX do, effectively.
+
+ * sed.c (compile_regex): The 'slash' character doesn't
+ terminate the regex if it's in a character class.
+
+ * sed.c (main): If given no args, or bad option, print usage
+ message.
+ (usage): New function.
+
+ * sed.c (execute_program): Amount written for 'P' command was
+ wrong. From stephend@ksr.com (Stephen Davis).
+
+Wed Aug 7 16:51:14 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * sed.c (append_pattern_space): Check for buffer full before
+ instead of after writing to buffer. Don't need to test for
+ EOF initially anymore, due to the next change.
+ (execute_program): For 'n' and 'N' commands, if eof is reached
+ in input, quit the script like Unix sed does.
+ Fix memory allocation problems for 'a' and 'r' commands.
+ (compile_program): Fix off by one error in processing comments.
+ All of the above are from Tapani Tarvainen, tarvaine@tukki.jyu.fi.
+
+ * sed.c (setup_jump): Use isblank instead of testing for ' '
+ or '\t', for POSIX locales.
+
+ * utils.c (ck_strdup): Renamed from strdup.
+ * sed.c: Change callers.
+
+ * sed.c, utils.c: Clean up declarations and includes to get
+ rid of compiler warnings.
+
+ * sed.c (main): Add long-named options. Don't complain if -n
+ is given twice.
+
+Fri Aug 2 12:33:16 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * configure: Support +srcdir arg. Create config.status and
+ remove it and Makefile if interrupted while creating them.
+ * Makefile.in: Change DESTDIR to prefix.
+
+Mon Jul 15 13:07:39 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * sed.c (main): Add -V option to print version number.
+ (USAGE): Mention -V.
+
+Mon Jul 8 01:42:22 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * sed.c: Define bcopy in terms of memcpy if STDC_HEADERS as
+ well as if USG.
+ (compile_filename): Don't glob filename (for 'r' and 'w'
+ commands). Unix sed doesn't do it and it's not very useful,
+ since it can only match 0 or 1 files.
+ (execute_program): Change '\a' to 007 since some compilers
+ don't recognize \a.
+ * utils.c: New file; code moved from sed.c.
+ * Replace Makefile with Makefile.in and configure.
+ Update README.
+
+Tue Mar 26 13:00:48 EST 1991 Jay Fenlason (hack@gnu.ai.mit.edu)
+
+ * sed.c (match_address) Added a trivial cast for portability.
+
+Mon Feb 25 13:23:29 EST 1991 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c Changed 's' command to work with latest version of regex()
+ routines, which mysteriously changed somewhere in there. . .
+ A one-line patch from David Eckelkamp (eckelkamp@mcc.com).
+
+ Initialize the fastmap in the hopes that it'll make sed faster.
+
+Thu Feb 21 13:42:27 EST 1991 Jay Fenlason (hack@ai.mti.edu)
+
+ * sed.c Change panic to compile with other __STDC__ compilers.
+
+Wed Jan 30 10:46:38 EST 1991 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c Changed version number. Made new release.
+
+Tue Nov 27 15:34:51 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c (setup_jump) Don't blow chunks if there isn't a label
+ after a b or t command.
+
+ (main) Don't panic if it a branch command doesn't have
+ a label to branch to.
+
+ (main) Collect all the -e arguments together and parse them
+ all at once. This way, -e { -e mumble -e } will work.
+
+ All these small patches from David Schmidt (davids@isc-br.isc-br.com)
+
+Tue Sep 11 12:51:37 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c Changed some function forward declarations to use VOID *
+ instead of char *
+
+Mon Jul 16 11:12:54 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c (ck_malloc) Use malloc(1) instead of malloc(0) if given
+ a request for zero bytes.
+
+Tue Jun 5 02:05:37 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * sed.c: Remove excess newlines from calls to panic.
+ Reformat some comments to fit in 79 columns.
+ Base whether to use void * on __STDC__, not __GNU__.
+ (main): Add missing arg when printing usage message.
+ Print usage if given invalid arg.
+ (panic) [__STDC__]: Add missing ", ...".
+ (compile_filename): Print correct error message if glob_filename
+ returns NULL.
+
+Thu Apr 5 21:41:12 1990 Jim Kingdon (kingdon at pogo.ai.mit.edu)
+
+ * sed.c (execute_program, case 'r'): When need to realloc append.text,
+ multiply append.alloc by 2 instead of adding
+ cur_cmd->x.cmd_txt.text_len.
+
+Tue Mar 6 15:55:35 EST 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * sed.c (compile_regex) Allocate 10 bytes extra space needed by
+ re_compile_pattern.
+
+Sun Feb 25 16:32:10 1990 Jim Kingdon (kingdon at pogo.ai.mit.edu)
+
+ * sed.c (execute_program, case 'l'): Print \00 instead of \0.
+ Print backslash as \\ not \.
+ Print \xx instead of /xx.
+
+Thu Feb 1 14:02:28 EST 1990 hack@wookumz
+
+ * sed.c (memchr) Use () inside inner loop so it will work correctly.
+ A two character patch from Robert A Bruce (rab@allspice.berkeley.edu)
+
+Wed Sep 27 18:47:39 EDT 1989 hack@ai.mit.edu
+
+ * sed.c (compile_regex) New function. When compiling regex,
+ turn ^ into \` and $ into \' so that they won't match on embedded
+ newlines. UN*X pattern matching is a crock.
+ (compile_program, compile_address) call compile_regex.
+
+Mon Sep 18 10:15:32 EDT 1989 hack@ai.mit.edu
+
+ * sed.c (compile_program): define translate as unsigned char * so
+ that y command will work on non-ascii characters.
+
+ Changed version number to 1.06.
+
+Thu Sep 14 15:57:08 EDT 1989 hack@ai.mit.edu
+
+ * sed.c (compile_program) Let programs use ; to terminate } as
+ well as newline.
+
+ (read_file) Print an error msg to stderr if it can't open an
+ input file.
+
+Thu Mar 23 18:04:46 1989 Randall Smith (randy at apple-gunkies.ai.mit.edu)
+
+ * Makefile, sed.c: Added new copyright notice.
+
+ * Makefile: Make distributions which follow the symlinks.
+
+hack@ai.mit.edu
+
+ 1.05 Fixed error in 'r' (now does things in the right order)
+
+ 1.04 Fixed s/re/rep/[number]
+
+ 1.03 Fixes from Mike Haertel for regexps that match the
+ empty string, and for Ritchie stdio (non-sticky EOF)
+
+ 1.02 Fixed 't', 'b', ':' to trim leading spaces and tabs
+ Fixed \\ in replacement of 's' command
+ Added comments
+
+ 1.01 Added s/re/rep/[digits]
+ added #n as first line of script
+ added filename globbing
+ added 'l' command
+ All in the name of POSIX
+
+ 1.00 Began (thinking about) distributing this file
+
+Local Variables:
+mode: indented-text
+left-margin: 8
+version-control: never
+End:
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..54caf7c
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,229 @@
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+ This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+ These are generic installation instructions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. (Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.)
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You only need
+`configure.ac' if you want to change it or regenerate `configure' using
+a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system. If you're
+ using `csh' on an old version of System V, you might need to type
+ `sh ./configure' instead to prevent `csh' from trying to execute
+ `configure' itself.
+
+ Running `configure' takes awhile. While running, it prints some
+ messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. Run `./configure --help'
+for details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ If you have to use a `make' that does not support the `VPATH'
+variable, you have to compile the package for one architecture at a
+time in the source code directory. After you have installed the
+package for one architecture, use `make distclean' before reconfiguring
+for another architecture.
+
+Installation Names
+==================
+
+ By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc. You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+ There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on. Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the `--target=TYPE' option to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+ Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+will cause the specified gcc to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+`configure' Invocation
+======================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--help'
+`-h'
+ Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
+
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..f0aaa7f
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,30 @@
+## Process this file with automake to produce Makefile.in
+
+# Automake requirements
+AUTOMAKE_OPTIONS = gnits 1.8
+ACLOCAL_AMFLAGS = -I config
+
+PACKAGE = sed
+
+SUBDIRS = intl lib po sed doc testsuite
+
+noinst_DATA = bootstrap.sh
+noinst_HEADERS = basicdefs.h
+
+EXTRA_DIST = BUGS THANKS COPYING.DOC README.boot bootstrap.sh \
+ config/texi2dvi config/help2man
+
+html:
+ cd doc && make html
+
+update-regex:
+ cd lib && \
+ HOST=sources.redhat.com && \
+ BASEURL="http://$$HOST/cgi-bin/cvsweb.cgi/~checkout~/libc/posix" && \
+ QUERY='cvsroot=glibc&content-type=text/plain' && \
+ wget -O regcomp.c "$$BASEURL/regcomp.c?$$QUERY" && \
+ wget -O regexec.c "$$BASEURL/regexec.c?$$QUERY" && \
+ wget -O regex.c "$$BASEURL/regex.c?$$QUERY" && \
+ wget -O regex_.h "$$BASEURL/regex.h?$$QUERY" && \
+ wget -O regex_internal.c "$$BASEURL/regex_internal.c?$$QUERY" && \
+ wget -O regex_internal.h "$$BASEURL/regex_internal.h?$$QUERY"
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..68329bb
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,426 @@
+Sed 4.1.3
+
+* added a note to BUGS and the manual about changed interpretation
+ of `s|abc\|def||'.
+
+* fixed `make check' in non-English locales.
+
+----------------------------------------------------------------------------
+Sed 4.1.2
+
+* fix bug in 'y' command in multi-byte character sets
+
+* fix severe bug in parsing of ranges with an embedded open bracket
+
+* fix off-by-one error when printing a "bad command" error
+
+----------------------------------------------------------------------------
+Sed 4.1.1
+
+* preserve permissions of in-place edited files
+
+* yield an error when running -i on terminals or other non regular files
+
+* do not interpret - as stdin when running in in-place editing mode
+
+* fix bug that prevented 's' command modifiers from working
+
+----------------------------------------------------------------------------
+Sed 4.1
+
+* // matches the last regular expression even in POSIXLY_CORRECT mode.
+
+* change the way we treat lines which are not terminated by a newline.
+Such lines are printed without the terminating newline (as before)
+but as soon as more text is sent to the same output stream, the
+missing newline is printed, so that the two lines don't concatenate.
+The behavior is now independent from POSIXLY_CORRECT because POSIX
+actually has undefined behavior in this case, and the new implementation
+arguably gives the ``least expected surprise''. Thanks to Stepan
+Kasal for the implementation.
+
+* documentation improvements, with updated references to the POSIX.2
+specification
+
+* error messages on I/O errors are better, and -i does not leave temporary
+files around (e.g. when running ``sed -i'' on a directory).
+
+* escapes are accepted in the y command (for example: y/o/\n/ transforms
+o's into newlines)
+
+* -i option tries to set the owner and group to the same as the input file
+
+* `L' command is deprecated and will be removed in sed 4.2.
+
+* line number addresses are processed differently -- this is supposedly
+conformant to POSIX and surely more idiot-proof. Line number addresses
+are not affected by jumping around them: they are activated and
+deactivated exactly where the script says, while previously
+ 5,8b
+ 1,5d
+would actually delete lines 1,2,3,4 and 9 (!).
+
+* multibyte characters are taken in consideration to compute the
+operands of s and y, provided you set LC_CTYPE correctly. They are
+also considered by \l, \L, \u, \U, \E.
+
+* [\n] matches either backslash or 'n' when POSIXLY_CORRECT.
+
+* new option --posix, disables all GNU extensions. POSIXLY_CORRECT only
+disables GNU extensions that violate the POSIX standard.
+
+* options -h and -V are not supported anymore, use --help and --version.
+
+* removed documentation for \s and \S which worked incorrectly
+
+* restored correct behavior for \w and \W: match [[:alnum:]_] and
+[^[:alnum:]_] (they used to match [[:alpha:]_] and [^[:alpha:]_]
+
+* the special address 0 can only be used in 0,/RE/ or 0~STEP addresses;
+other cases give an error (you are hindering portability for no reason
+if specifying 0,N and you are giving a dead command if specifying 0
+alone).
+
+* when a \ is used to escape the character that would terminate an operand
+of the s or y commands, the backslash is removed before the regex is
+compiled. This is left undefined by POSIX; this behavior makes `s+x\+++g'
+remove occurrences of `x+', consistently with `s/x\///g'. (However, if
+you enjoy yourself trying `s*x\***g', sed will use the `x*' regex, and you
+won't be able to pass down `x\*' while using * as the delimiter; ideas on
+how to simplify the parser in this respect, and/or gain more coherent
+semantics, are welcome).
+
+
+----------------------------------------------------------------------------
+Sed 4.0.9
+
+* 0 address behaves correctly in single-file (-i and -s) mode.
+
+* documentation improvements.
+
+* tested with many hosts and compilers.
+
+* updated regex matcher from upstream, with many bugfixes and speedups.
+
+* the `N' command's feature that is detailed in the BUGS file was disabled
+by the first change below in sed 4.0.8. The behavior has now been
+restored, and is only enabled if POSIXLY_CORRECT behavior is not
+requested.
+
+----------------------------------------------------------------------------
+Sed 4.0.8
+
+* fix `sed n' printing the last line twice.
+
+* fix incorrect error message for invalid character classes.
+
+* fix segmentation violation with repeated empty subexpressions.
+
+* fix incorrect parsing of ^ after escaped (.
+
+* more comprehensive test suite (and with many expected failures...)
+
+----------------------------------------------------------------------------
+Sed 4.0.7
+
+* VPATH builds working on non-glibc machines
+
+* fixed bug in s///Np: was printing even if less than N matches were
+found.
+
+* fixed infinite loop on s///N when LHS matched a null string and
+there were not enough matches in pattern space
+
+* behavior of s///N is consistent with s///g when the LHS can match
+a null string (and the infinite loop did not happen :-)
+
+* updated some translations
+
+----------------------------------------------------------------------------
+Sed 4.0.6
+
+* added parameter to `v' for the version of sed that is expected.
+
+* configure switch --without-included-regex to use the system regex matcher
+
+* fix for -i option under Cygwin
+
+----------------------------------------------------------------------------
+Sed 4.0.5
+
+* portability fixes
+
+* improvements to some error messages (e.g. y/abc/defg/ incorrectly said
+`excess characters after command' instead of `y arguments have different
+lengths')
+
+* `a', `i', `l', `L', `r' accept two addresses except in POSIXLY_CORRECT
+mode. Only `q' and `Q' do not accept two addresses in standard (GNU) mode.
+
+----------------------------------------------------------------------------
+Sed 4.0.4
+
+* documentation fixes
+
+* update regex matcher
+
+----------------------------------------------------------------------------
+Sed 4.0.3
+
+* fix packaging problem (two missing translation catalogs)
+
+----------------------------------------------------------------------------
+Sed 4.0.2
+
+* more translations
+
+* fix build problems (vpath builds and bootstrap builds)
+
+----------------------------------------------------------------------------
+Sed 4.0.1
+
+* Remove last vestiges of super-sed
+
+* man page automatically built
+
+* more translations provided
+
+* portability improvements
+
+----------------------------------------------------------------------------
+Sed 4.0
+
+* Update regex matcher
+
+----------------------------------------------------------------------------
+Sed 3.96
+
+* `y' command supports multibyte character sets
+
+* Update regex matcher
+
+----------------------------------------------------------------------------
+Sed 3.95
+
+* `R' command reads a single line from a file.
+
+* CR-LF pairs are always ignored under Windows, even if (under Cygwin)
+a disk is mounted as binary.
+
+* More attention to errors on stdout
+
+* New `W' command to write first line of pattern space to a file
+
+* Can customize line wrap width on single `l' commands
+
+* `L' command formats and reflows paragraphs like `fmt' does.
+
+* The test suite makefiles are better organized (this change is
+transparent however).
+
+* Compiles and bootstraps out-of-the-box under MinGW32 and Cygwin.
+
+* Optimizes cases when pattern space is truncated at its start or at
+its end by `D' or by a substitution command with an empty RHS.
+For example scripts like this,
+
+ seq 1 10000 | tr \\n \ | ./sed ':a; s/^[0-9][0-9]* //; ta'
+
+whose behavior was quadratic with previous versions of sed, have
+now linear behavior.
+
+* New command `e' to pipe the output of a command into the output
+of sed.
+
+* New option `e' to pass the output of the `s' command through the
+Bourne shell and get the result into pattern space.
+
+* Switched to obstacks in the parser -- less memory-related bugs
+(there were none AFAIK but you never know) and less memory usage.
+
+* New option -i, to support in-place editing a la Perl. Usually one
+had to use ed or, for more complex tasks, resort to Perl; this is
+not necessary anymore.
+
+* Dumped buffering code. The performance loss is 10%, but it caused
+bugs in systems with CRLF termination. The current solution is
+not definitive, though.
+
+* Bug fix: Made the behavior of s/A*/x/g (i.e. `s' command with a
+possibly empty LHS) more consistent:
+
+ pattern GNU sed 3.x GNU sed 4.x
+ B xBx xBx
+ BC xBxCx xBxCx
+ BAC xBxxCx xBxCx
+ BAAC xBxxCx xBxCx
+
+* Bug fix: the // empty regular expressions now refers to the last
+regular expression that was matched, rather than to the last
+regular expression that was compiled. This richer behavior seems
+to be the correct one (albeit neither one is POSIXLY_CORRECT).
+
+* Check for invalid backreferences in the RHS of the `s' command
+(e.g. s/1234/\1/)
+
+* Support for \[lLuUE] in the RHS of the `s' command like in Perl.
+
+* New regular expression matcher
+
+* Bug fix: if a file was redirected to be stdin, sed did not consume
+it. So
+ (sed d; sed G) < TESTFILE
+
+double-spaced TESTFILE, while the equivalent `useless use of cat'
+ cat TESTFILE | (sed d; sed G)
+
+printed nothing (which is the correct behavior). A test for this
+bug was added to the test suite.
+
+* The documentation is now much better, with a few examples provided,
+and a thorough description of regular expressions. The manual often
+refers to "GNU extensions", but if they are described here they are
+specific to this version.
+
+* Documented command-line option:
+ -r, --regexp-extended
+ Use extended regexps -- e.g. (abc+) instead of \(abc\+\)
+
+* Added feature to the `w' command and to the `w' option of the `s'
+command: if the file name is /dev/stderr, it means the standard
+error (inspired by awk); and similarly for /dev/stdout. This is
+disabled if POSIXLY_CORRECT is set.
+
+* Added `m' and `M' modifiers to `s' command for multi-line
+matching (Perl-style); in addresses, only `M' works.
+
+* Added `Q' command for `silent quit'; added ability to pass
+an exit code from a sed script to the caller.
+
+* Added `T' command for `branch if failed'.
+
+* Added `v' command, which is a do-nothing intended to fail on
+seds that do not support GNU sed 4.0's extensions.
+
+----------------------------------------------------------------------------
+Sed 3.02.80
+
+* Started new version nomenclature for pre-3.03 releases. (I'm being
+pessimistic in assuming that .90 won't give me enough breathing room.)
+
+* Bug fixes: the regncomp()/regnexec() interfaces proved to be inadequate to
+properly handle expressions such as "s/\</#/g". Re-abstracted the regex
+code in the sed/ tree, and now use the re_search_2() interface to the GNU
+regex routines. This change also fixed a bug where /./ did not match the
+NUL character. Had the glibc folk fix a bug in lib/regex.c where
+'s/0*\([0-9][0-9]\)/X\1X/' failed to match on input "002".
+
+* Added new command-line options:
+ -u, --unbuffered
+ Do not attempt to read-ahead more than required; do not buffer stdout.
+ -l N, --line-length=N
+ Specify the desired line-wrap length for the `l' command.
+ A length of "0" means "never wrap".
+
+* New internationalization translations added: fr ru de it el sk pt_BR sv
+(plus nl from 3.02a).
+
+* The s/// command now understands the following escapes
+(in both halves):
+ \a an "alert" (BEL)
+ \f a form-feed
+ \n a newline
+ \r a carriage-return
+ \t a horizontal tab
+ \v a vertical tab
+ \oNNN a character with the octal value NNN
+ \dNNN a character with the decimal value NNN
+ \xNN a character with the hexadecimal value NN
+This behavior is disabled if POSIXLY_CORRECT is set, at least for the
+time being (until I can be convinced that this behavior does not violate
+the POSIX standard). (Incidentally, \b (backspace) was omitted because
+of the conflict with the existing "word boundary" meaning. \ooo octal
+format was omitted because of the conflict with backreference syntax.)
+
+* If POSIXLY_CORRECT is set, the empty RE // now is the null match
+instead of "repeat the last REmatch". As far as I can tell
+this behavior is mandated by POSIX, but it would break too many
+legacy sed scripts to blithely change GNU sed's default behavior.
+
+----------------------------------------------------------------------------
+Sed 3.02a
+
+* Added internationalization support, and an initial (already out of date)
+set of Dutch message translations (both provided by Erick Branderhorst).
+
+* Added support for scripts like:
+ sed -e 1ifoo -e '$abar'
+(note no need for \ <newline> after a, i, and c commands).
+Also, conditionally (on NO_INPUT_INDENT) added
+experimental support for skipping leading whitespace on
+each {a,i,c} input line.
+
+* Added addressing of the form:
+ /foo/,+5 p (print from foo to 5th line following)
+ /foo/,~5 p (print from foo to next line whose line number is a multiple of 5)
+The first address of these can be any of the previously existing
+addressing types; the +N and ~N forms are only allowed as the
+second address of a range.
+
+* Added support for pseudo-address "0" as the first address in an
+address-range, simplifying scripts which happen to match the end
+address on the first line of input. For example, a script
+which deletes all lines from the beginning of the file to the
+first line which contains "foo" is now simply "sed 0,/foo/d",
+whereas before one had to go through contortions to deal with
+the possibility that "foo" might appear on the first line of
+the input.
+
+* Made NUL characters in regexps work "correctly" --- i.e., a NUL
+in a RE matches a NUL; it does not prematurely terminate the RE.
+(This only works in -f scripts, as the POSIX.1 exec*() interface
+only passes NUL-terminated strings, and so sed will only be able
+to see up to the first NUL in any -e scriptlet.)
+
+* Wherever a `;' is accepted as a command terminator, also allow a `}'
+or a `#' to appear. (This allows for less cluttered-looking scripts.)
+
+* Lots of internal changes that are only relevant to source junkies
+and development testing. Some of which might cause imperceptible
+performance improvements.
+
+----------------------------------------------------------------------------
+Sed 3.02
+
+* Fixed a bug in the parsing of character classes (e.g., /[[:space:]]/).
+Corrected an omission in djgpp/Makefile.am and an improper dependency
+in testsuite/Makefile.am.
+
+----------------------------------------------------------------------------
+Sed 3.01
+
+* This version of sed mainly contains bug fixes and portability
+enhancements, plus performance enhancements related to sed's handling
+of input files. Due to excess performance penalties, I have reverted
+(relative to 3.00) to using regex.c instead of the rx package for
+regular expression handling, at the expense of losing true POSIX.2
+BRE compatibility. However, performance related to regular expression
+handling *still* needs a fair bit of work.
+
+* One new feature has been added: regular expressions may be followed
+with an "I" directive ("i" was taken [the "i"nsert command]) to
+indicate that the regexp should be matched in a case-insensitive
+manner. Also of note are a new organization to the source code,
+new documentation, and a new maintainer.
+
+----------------------------------------------------------------------------
+Sed 3.0
+
+* This version of sed passes the new test-suite donated by
+Jason Molenda.
+
+* Overall performance has been improved in the following sense: Sed 3.0
+is often slightly slower than sed 2.05. On a few scripts, though, sed
+2.05 was so slow as to be nearly useless or to use up unreasonable
+amounts of memory. These problems have been fixed and in such cases,
+sed 3.0 should have acceptable performance.
diff --git a/README b/README
new file mode 100644
index 0000000..3da31c2
--- /dev/null
+++ b/README
@@ -0,0 +1,13 @@
+This is the GNU implementation of sed, the Unix stream editor.
+
+See the NEWS file for a brief summary and the ChangeLog for
+more detailed descriptions of changes.
+
+See the file INSTALL for generic compilation and installation
+instructions.
+
+See the file BUGS for instructions about reporting bugs.
+
+The file README.boot gives instructions for making a "bootstrap"
+version of sed on systems which lack any pre-existing and working
+version of sed.
diff --git a/README-alpha b/README-alpha
new file mode 100644
index 0000000..9235efe
--- /dev/null
+++ b/README-alpha
@@ -0,0 +1,8 @@
+This is an alpha version of GNU sed. Please try it on a wide
+range of scripts (especially configure scripts) and submit
+bug reports to bonzini@gnu.org.
+
+Thanks,
+
+Paolo Bonzini
+GNU sed maintainer
diff --git a/README.boot b/README.boot
new file mode 100644
index 0000000..fd2d1a0
--- /dev/null
+++ b/README.boot
@@ -0,0 +1,23 @@
+Because a working sed is a prerequisite for running the ``configure''
+script, I have provided the script ``bootstrap.sh'' which will attempt
+to build a version of sed adequate for running ``configure''. If it
+fails, edit the ``config.h'' file that was created according to the
+comments found therein, and then try running ``bootstrap.sh'' again.
+
+The bootstrap build is quite likely to babble on and on with
+various compiler warnings. You may want to tell bootstrap.sh
+how to invoke your compiler with warnings disabled. For example,
+with a Bourne-like shell and gcc one could use:
+ $ CC='gcc -w' sh bootstrap.sh
+or with a csh-like shell, one could try:
+ % env CC='gcc -w' sh bootstrap.sh
+
+Once you get a working version of sed, temporarily install sed/sed
+somewhere in your $PATH, and then really re-build the normal way
+(starting with ``sh configure''); the bootstrap version is almost
+certainly more crippled than it needs to be on your machine.
+
+I don't much care to hear about any bugs in ``bootstrap'' versions
+of sed beyond those which actually keep the ``bootstrap'' version from
+building, or sed's configure script from running properly. I am
+especially uninterested in compiler warnings from the bootstrap build.
diff --git a/THANKS b/THANKS
new file mode 100644
index 0000000..a922b4f
--- /dev/null
+++ b/THANKS
@@ -0,0 +1,46 @@
+Akim Demaille <akim@epita.fr>
+Alan Modra <alan@spri.levels.unisa.edu.au>
+Arnold Robbins <arnold@skeeve.com>
+Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
+Andrew Herbert <andrew@werple.apana.org.au>
+Bruno Haible <haible@ilog.fr>
+Chip Salzenberg <chip@fin.uucp>
+Chris Weber <weber@bucknell.edu>
+David Eckelkamp <eckelkamp@mcc.com>
+David J. MacKenzie <djm@nutrimat>
+David Schmidt <davids@isc-br.isc-br.com>
+Dietrich Kappe <kap1@tao.cpe.uchicago.edu>
+Doug McIlroy <doug@research.att.com>
+Eli Zaretskii <eliz@is.elta.co.il>
+Eric Pement <epement@jpusa.chi.il.us>
+Erick Branderhorst <Erick.Branderhorst@asml.nl>
+Francois Pinard <pinard@iro.umontreal.ca>
+Gaumond Pierre <gaumondp@ERE.UMontreal.CA>
+Greg Ubben <gsu@romulus.ncsc.mil>
+Isamu Hasegawa <isamu@yamato.ibm.com>
+J.T. Conklin <jtc@gain.com>
+Jakub Jelinek <jakub@redhat.com>
+Jason Molenda <crash@cygnus.com>
+Jim Meyering <meyering@ascend.com>
+Laurent Vogel <lvl@club-internet.fr>
+Karl Berry <karl@freefriends.org>
+Karl Heuer <kwzh@gnu.org>
+Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
+Kevin Buettner <kev@cujo.geg.mot.com>
+Mark Kettenis <kettenis@phys.uva.nl>
+Michael De La Rue <delarue@NTCCSC01WA.ntc.nokia.com>
+Michel de Ruiter <mdruiter@cs.vu.nl>
+Paul Eggert <eggert@twinsun.com>
+Robert A Bruce <rab@allspice.berkeley.edu>
+Ronnie Glasscock <Ronnie.N.Glasscock@bridge.bellsouth.com>
+Simon Taylor <simon@unisolve.com.au>
+Stepan Kasal <kasal@ucw.cz>
+Stephen Davis <stephend@ksr.com>
+Steve Ingram <si@maps-r-us.com>
+Tapani Tarvainen <tarvaine@tukki.jyu.fi>
+Timothy J Luoma <luomat@peak.org>
+Tom R.Hageman <tom@basil.icce.rug.nl>
+Vladimir Volovich <vvv@vvv.vsu.ru>
+Wichert Akkerman <wakkerma@debian.org>
+
+And the GNU translation teams.
diff --git a/basicdefs.h b/basicdefs.h
new file mode 100644
index 0000000..1c5c9c3
--- /dev/null
+++ b/basicdefs.h
@@ -0,0 +1,202 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1998, 1999, 2002, 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef BASICDEFS_H
+#define BASICDEFS_H
+
+#if defined(_AIX)
+#pragma alloca
+#else
+# if !defined(alloca) /* predefined by HP cc +Olibcalls */
+# ifdef __GNUC__
+# define alloca(size) __builtin_alloca(size)
+# else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# if defined(__hpux)
+ void *alloca ();
+# else
+# if !defined(__OS2__) && !defined(WIN32)
+ char *alloca ();
+# else
+# include <malloc.h> /* OS/2 defines alloca in here */
+# endif
+# endif
+# endif
+# endif
+# endif
+#endif
+
+#ifdef HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+#ifdef HAVE_WCTYPE_H
+# include <wctype.h>
+#endif
+
+
+#ifdef BOOTSTRAP
+# define false 0
+# define true 1
+# define bool unsigned
+# define __bool_true_false_are_defined 1
+#else
+# include <stdbool.h>
+#endif
+
+#if ENABLE_NLS
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+# define ngettext(sing, plur, n) ((n) == 1 ? (sing) : (plur))
+#endif
+#define _(String) gettext(String)
+
+#ifdef gettext_noop
+# define N_(String) gettext_noop(String)
+#else
+# define N_(String) (String)
+#endif
+
+/* type countT is used to keep track of line numbers, etc. */
+typedef unsigned long countT;
+
+/* Oftentimes casts are used as an ugly hack to silence warnings
+ * from the compiler. However, sometimes those warnings really
+ * do point to something worth avoiding. I define this
+ * dummy marker to make searching for them with a text editor
+ * much easier, in case I want to verify that they are all
+ * legitimate. It is defined in the way it is so that it is
+ * easy to disable all casts so that the compiler (or lint)
+ * can tell me potentially interesting things about what would
+ * happen to the code without the explicit casts.
+ */
+#ifdef LOUD_LINT
+# define CAST(x)
+#else
+# define CAST(x) (x)
+#endif
+
+
+/* Can the compiler grok function prototypes? */
+#if (defined __STDC__ && __STDC__-0) || defined __GNUC__ || defined __SUNPRO_C || __PROTOTYPES
+# define P_(s) s
+#else
+# define P_(s) ()
+#endif
+
+/* (VOID *) is the generic pointer type; some ancient compilers
+ don't know about (void *), and typically use (char *) instead.
+ VCAST() is used to cast to and from (VOID *)s --- but if the
+ compiler *does* support (void *) make this a no-op, so that
+ the compiler can detect if we omitted an essential function
+ declaration somewhere.
+ */
+#ifndef VOID
+# define VOID void
+# define VCAST(t)
+#else
+# define VCAST(t) (t)
+#endif
+
+/* some basic definitions to avoid undue promulgating of VCAST ugliness */
+#define MALLOC(n,t) (VCAST(t *)ck_malloc((n)*sizeof(t)))
+#define REALLOC(x,n,t) (VCAST(t *)ck_realloc(VCAST(VOID *)(x),(n)*sizeof(t)))
+#define MEMDUP(x,n,t) (VCAST(t *)ck_memdup(VCAST(VOID *)(x),(n)*sizeof(t)))
+#define FREE(x) (ck_free(VCAST(VOID *)x))
+#define MEMCPY(d,s,l) (memcpy(VCAST(VOID *)(d),VCAST(const VOID *)(s),l))
+#define MEMMOVE(d,s,l) (memmove(VCAST(VOID *)(d),VCAST(const VOID *)(s),l))
+#define OB_MALLOC(o,n,t) (VCAST(t *)obstack_alloc(o,(n)*sizeof(t)))
+
+#define obstack_chunk_alloc ck_malloc
+#define obstack_chunk_free ck_free
+
+
+#ifdef HAVE_MEMORY_H
+# include <memory.h>
+#endif
+
+#ifndef HAVE_MEMMOVE
+# ifndef memmove
+ /* ../lib/libsed.a provides a memmove() if the system doesn't.
+ Here is where we declare its return type; we don't prototype
+ it because that sometimes causes problems when we're running in
+ bootstrap mode on a system which really does support memmove(). */
+ extern VOID *memmove();
+# endif
+#endif
+
+#ifndef HAVE_MEMCPY
+# ifndef memcpy
+# define memcpy(d, s, n) memmove(d, s, n)
+# endif
+#endif
+
+#ifndef HAVE_STRERROR
+ extern char *strerror P_((int e));
+#endif
+
+
+/* handle misdesigned <ctype.h> macros (snarfed from lib/regex.c) */
+/* Jim Meyering writes:
+
+ "... Some ctype macros are valid only for character codes that
+ isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
+ using /bin/cc or gcc but without giving an ansi option). So, all
+ ctype uses should be through macros like ISPRINT... If
+ STDC_HEADERS is defined, then autoconf has verified that the ctype
+ macros don't need to be guarded with references to isascii. ...
+ Defining isascii to 1 should let any compiler worth its salt
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
+
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
+#else
+# define ISASCII(c) isascii(c)
+#endif
+
+#if defined isblank || defined HAVE_ISBLANK
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
+#else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+
+#ifndef initialize_main
+# ifdef __EMX__
+# define initialize_main(argcp, argvp) \
+ { _response(argcp, argvp); _wildcard(argcp, argvp); }
+# else /* NOT __EMX__ */
+# define initialize_main(argcp, argvp)
+# endif
+#endif
+
+#endif /*!BASICDEFS_H*/
diff --git a/bootstrap.sh b/bootstrap.sh
new file mode 100755
index 0000000..1c5977b
--- /dev/null
+++ b/bootstrap.sh
@@ -0,0 +1,82 @@
+#! /bin/sh
+
+# edit this to taste; note that you can also override via the environment:
+case "$CC" in
+ "") CC=cc
+esac
+
+if test -f config.h; then :; else
+ echo "Creating basic config.h..."
+ cat >config.h <<'END_OF_CONFIG_H'
+/* A bootstrap version of config.h, for systems which can't
+ auto-configure due to a lack of a working sed. If you are on
+ a sufficiently odd machine you may need to hand-tweak this file.
+
+ Regardless, once you get a working version of sed you really should
+ re-build starting with a run of "configure", as the bootstrap
+ version is almost certainly more crippled than it needs to be on
+ your machine.
+*/
+
+#define PACKAGE "sed"
+#define VERSION "4.1.2-boot"
+#define SED_FEATURE_VERSION "4.1"
+#define BOOTSTRAP 1
+
+/* Define if your compiler/headers don't support const. */
+#undef const
+
+/* Undefine if headers have conflicting definition. */
+#define mbstate_t int
+
+/* Toggle if you encounter errors in lib/mkstemp.c. */
+#define HAVE_UNISTD_H
+#define HAVE_FCNTL_H
+#undef HAVE_SYS_FILE_H
+#undef HAVE_IO_H
+
+/* Undefine if <stdio.h> or <sys/types.h> has conflicting definition. */
+#define size_t unsigned
+#define ssize_t int
+
+/* If your antique compiler doesn't grok ``void *'', then #define VOID char */
+#undef VOID
+
+
+/* All other config.h.in options intentionally omitted. Report as a
+ bug if you need extra "#define"s in here. */
+END_OF_CONFIG_H
+fi
+
+# tell the user what we're doing from here on...
+set -x -e
+
+# the ``|| exit 1''s are for fail-stop; set -e doesn't work on some systems
+
+rm -f lib/*.o sed/*.o sed/sed
+cd lib || exit 1
+rm -f regex.h
+cp regex_.h regex.h
+${CC} -DHAVE_CONFIG_H -I.. -I. -c alloca.c
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getline.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getopt.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getopt1.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memchr.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memcmp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memmove.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c mkstemp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c strverscmp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c obstack.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c regex.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c strerror.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c utils.c || exit 1
+
+cd ../sed || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c sed.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c fmt.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c compile.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c execute.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c mbcs.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c regexp.c || exit 1
+
+${CC} -o sed *.o ../lib/*.o || exit 1
diff --git a/bootstrap.sh.in b/bootstrap.sh.in
new file mode 100755
index 0000000..82b2432
--- /dev/null
+++ b/bootstrap.sh.in
@@ -0,0 +1,82 @@
+#! /bin/sh
+
+# edit this to taste; note that you can also override via the environment:
+case "$CC" in
+ "") CC=cc
+esac
+
+if test -f config.h; then :; else
+ echo "Creating basic config.h..."
+ cat >config.h <<'END_OF_CONFIG_H'
+/* A bootstrap version of config.h, for systems which can't
+ auto-configure due to a lack of a working sed. If you are on
+ a sufficiently odd machine you may need to hand-tweak this file.
+
+ Regardless, once you get a working version of sed you really should
+ re-build starting with a run of "configure", as the bootstrap
+ version is almost certainly more crippled than it needs to be on
+ your machine.
+*/
+
+#define PACKAGE "sed"
+#define VERSION "@VERSION@-boot"
+#define SED_FEATURE_VERSION "@SED_FEATURE_VERSION@"
+#define BOOTSTRAP 1
+
+/* Define if your compiler/headers don't support const. */
+#undef const
+
+/* Undefine if headers have conflicting definition. */
+#define mbstate_t int
+
+/* Toggle if you encounter errors in lib/mkstemp.c. */
+#define HAVE_UNISTD_H
+#define HAVE_FCNTL_H
+#undef HAVE_SYS_FILE_H
+#undef HAVE_IO_H
+
+/* Undefine if <stdio.h> or <sys/types.h> has conflicting definition. */
+#define size_t unsigned
+#define ssize_t int
+
+/* If your antique compiler doesn't grok ``void *'', then #define VOID char */
+#undef VOID
+
+
+/* All other config.h.in options intentionally omitted. Report as a
+ bug if you need extra "#define"s in here. */
+END_OF_CONFIG_H
+fi
+
+# tell the user what we're doing from here on...
+set -x -e
+
+# the ``|| exit 1''s are for fail-stop; set -e doesn't work on some systems
+
+rm -f lib/*.o sed/*.o sed/sed
+cd lib || exit 1
+rm -f regex.h
+cp regex_.h regex.h
+${CC} -DHAVE_CONFIG_H -I.. -I. -c alloca.c
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getline.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getopt.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c getopt1.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memchr.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memcmp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c memmove.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c mkstemp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c strverscmp.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c obstack.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c regex.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c strerror.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -c utils.c || exit 1
+
+cd ../sed || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c sed.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c fmt.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c compile.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c execute.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c mbcs.c || exit 1
+${CC} -DHAVE_CONFIG_H -I.. -I. -I../lib -c regexp.c || exit 1
+
+${CC} -o sed *.o ../lib/*.o || exit 1
diff --git a/config/getline.m4 b/config/getline.m4
new file mode 100644
index 0000000..ff8b5f4
--- /dev/null
+++ b/config/getline.m4
@@ -0,0 +1,41 @@
+#serial 4
+
+dnl See if there's a working, system-supplied version of the getline function.
+dnl We can't just do AC_REPLACE_FUNCS(getline) because some systems
+dnl have a function by that name in -linet that doesn't have anything
+dnl to do with the function we need.
+AC_DEFUN([AM_FUNC_GETLINE],
+[dnl
+ am_getline_needs_run_time_check=no
+ AC_CHECK_FUNC(getline,
+ dnl Found it in some library. Verify that it works.
+ am_getline_needs_run_time_check=yes,
+ am_cv_func_working_getline=no)
+ if test $am_getline_needs_run_time_check = yes; then
+ AC_CACHE_CHECK([for working getline function], am_cv_func_working_getline,
+ [echo fooN |tr -d '\012'|tr N '\012' > conftest.data
+ AC_TRY_RUN([
+# include <stdio.h>
+# include <sys/types.h>
+# include <string.h>
+ int main ()
+ { /* Based on a test program from Karl Heuer. */
+ char *line = NULL;
+ size_t siz = 0;
+ int len;
+ FILE *in = fopen ("./conftest.data", "r");
+ if (!in)
+ return 1;
+ len = getline (&line, &siz, in);
+ exit ((len == 4 && line && strcmp (line, "foo\n") == 0) ? 0 : 1);
+ }
+ ], am_cv_func_working_getline=yes dnl The library version works.
+ , am_cv_func_working_getline=no dnl The library version does NOT work.
+ , am_cv_func_working_getline=no dnl We're cross compiling.
+ )])
+ fi
+
+ if test $am_cv_func_working_getline = no; then
+ AC_LIBOBJ(getline)
+ fi
+])
diff --git a/config/gettext-ver.m4 b/config/gettext-ver.m4
new file mode 100644
index 0000000..7e553f3
--- /dev/null
+++ b/config/gettext-ver.m4
@@ -0,0 +1 @@
+AC_DEFUN([AM_GNU_GETTEXT_VERSION], [])
diff --git a/config/help2man b/config/help2man
new file mode 100755
index 0000000..5d4377b
--- /dev/null
+++ b/config/help2man
@@ -0,0 +1,559 @@
+#!/usr/bin/env perl
+
+# Generate a short man page from --help and --version output.
+# Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software
+# Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Written by Brendan O'Dea <bod@debian.org>
+# Available from ftp://ftp.gnu.org/gnu/help2man/
+
+use 5.005;
+use strict;
+use Getopt::Long;
+use Text::Tabs qw(expand);
+use POSIX qw(strftime setlocale LC_TIME);
+
+my $this_program = 'help2man';
+my $this_version = '1.28';
+my $version_info = <<EOT;
+GNU $this_program $this_version
+
+Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Written by Brendan O'Dea <bod\@debian.org>
+EOT
+
+my $help_info = <<EOT;
+`$this_program' generates a man page out of `--help' and `--version' output.
+
+Usage: $this_program [OPTION]... EXECUTABLE
+
+ -n, --name=STRING description for the NAME paragraph
+ -s, --section=SECTION section number for manual page (1, 6, 8)
+ -m, --manual=TEXT name of manual (User Commands, ...)
+ -S, --source=TEXT source of program (FSF, Debian, ...)
+ -i, --include=FILE include material from `FILE'
+ -I, --opt-include=FILE include material from `FILE' if it exists
+ -o, --output=FILE send output to `FILE'
+ -p, --info-page=TEXT name of Texinfo manual
+ -N, --no-info suppress pointer to Texinfo manual
+ --help print this help, then exit
+ --version print version number, then exit
+
+EXECUTABLE should accept `--help' and `--version' options although
+alternatives may be specified using:
+
+ -h, --help-option=STRING help option string
+ -v, --version-option=STRING version option string
+
+Report bugs to <bug-help2man\@gnu.org>.
+EOT
+
+my $section = 1;
+my $manual = '';
+my $source = '';
+my $help_option = '--help';
+my $version_option = '--version';
+my ($opt_name, @opt_include, $opt_output, $opt_info, $opt_no_info);
+
+my %opt_def = (
+ 'n|name=s' => \$opt_name,
+ 's|section=s' => \$section,
+ 'm|manual=s' => \$manual,
+ 'S|source=s' => \$source,
+ 'i|include=s' => sub { push @opt_include, [ pop, 1 ] },
+ 'I|opt-include=s' => sub { push @opt_include, [ pop, 0 ] },
+ 'o|output=s' => \$opt_output,
+ 'p|info-page=s' => \$opt_info,
+ 'N|no-info' => \$opt_no_info,
+ 'h|help-option=s' => \$help_option,
+ 'v|version-option=s' => \$version_option,
+);
+
+# Parse options.
+Getopt::Long::config('bundling');
+GetOptions (%opt_def,
+ help => sub { print $help_info; exit },
+ version => sub { print $version_info; exit },
+) or die $help_info;
+
+die $help_info unless @ARGV == 1;
+
+my %include = ();
+my %append = ();
+my @include = (); # retain order given in include file
+
+# Process include file (if given). Format is:
+#
+# [section name]
+# verbatim text
+#
+# or
+#
+# /pattern/
+# verbatim text
+#
+
+while (@opt_include)
+{
+ my ($inc, $required) = @{shift @opt_include};
+
+ next unless -f $inc or $required;
+ die "$this_program: can't open `$inc' ($!)\n"
+ unless open INC, $inc;
+
+ my $key;
+ my $hash = \%include;
+
+ while (<INC>)
+ {
+ # [section]
+ if (/^\[([^]]+)\]/)
+ {
+ $key = uc $1;
+ $key =~ s/^\s+//;
+ $key =~ s/\s+$//;
+ $hash = \%include;
+ push @include, $key unless $include{$key};
+ next;
+ }
+
+ # /pattern/
+ if (m!^/(.*)/([ims]*)!)
+ {
+ my $pat = $2 ? "(?$2)$1" : $1;
+
+ # Check pattern.
+ eval { $key = qr($pat) };
+ if ($@)
+ {
+ $@ =~ s/ at .*? line \d.*//;
+ die "$inc:$.:$@";
+ }
+
+ $hash = \%append;
+ next;
+ }
+
+ # Check for options before the first section--anything else is
+ # silently ignored, allowing the first for comments and
+ # revision info.
+ unless ($key)
+ {
+ # handle options
+ if (/^-/)
+ {
+ local @ARGV = split;
+ GetOptions %opt_def;
+ }
+
+ next;
+ }
+
+ $hash->{$key} ||= '';
+ $hash->{$key} .= $_;
+ }
+
+ close INC;
+
+ die "$this_program: no valid information found in `$inc'\n"
+ unless $key;
+}
+
+# Compress trailing blank lines.
+for my $hash (\(%include, %append))
+{
+ for (keys %$hash) { $hash->{$_} =~ s/\n+$/\n/ }
+}
+
+# Turn off localisation of executable's ouput.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+# Turn off localisation of date (for strftime).
+setlocale LC_TIME, 'C';
+
+# Grab help and version info from executable.
+my ($help_text, $version_text) = map {
+ join '', map { s/ +$//; expand $_ } `$ARGV[0] $_ 2>/dev/null`
+ or die "$this_program: can't get `$_' info from $ARGV[0]\n"
+} $help_option, $version_option;
+
+my $date = strftime "%B %Y", localtime;
+(my $program = $ARGV[0]) =~ s!.*/!!;
+my $package = $program;
+my $version;
+
+if ($opt_output)
+{
+ unlink $opt_output
+ or die "$this_program: can't unlink $opt_output ($!)\n"
+ if -e $opt_output;
+
+ open STDOUT, ">$opt_output"
+ or die "$this_program: can't create $opt_output ($!)\n";
+}
+
+# The first line of the --version information is assumed to be in one
+# of the following formats:
+#
+# <version>
+# <program> <version>
+# {GNU,Free} <program> <version>
+# <program> ({GNU,Free} <package>) <version>
+# <program> - {GNU,Free} <package> <version>
+#
+# and seperated from any copyright/author details by a blank line.
+
+($_, $version_text) = split /\n+/, $version_text, 2;
+
+if (/^(\S+) +\(((?:GNU|Free) +[^)]+)\) +(.*)/ or
+ /^(\S+) +- *((?:GNU|Free) +\S+) +(.*)/)
+{
+ $program = $1;
+ $package = $2;
+ $version = $3;
+}
+elsif (/^((?:GNU|Free) +)?(\S+) +(.*)/)
+{
+ $program = $2;
+ $package = $1 ? "$1$2" : $2;
+ $version = $3;
+}
+else
+{
+ $version = $_;
+}
+
+$program =~ s!.*/!!;
+
+# No info for `info' itself.
+$opt_no_info = 1 if $program eq 'info';
+
+# --name overrides --include contents.
+$include{NAME} = "$program \\- $opt_name\n" if $opt_name;
+
+# Default (useless) NAME paragraph.
+$include{NAME} ||= "$program \\- manual page for $program $version\n";
+
+# Man pages traditionally have the page title in caps.
+my $PROGRAM = uc $program;
+
+# Set default page head/footers
+$source ||= "$program $version";
+unless ($manual)
+{
+ for ($section)
+ {
+ if (/^(1[Mm]|8)/) { $manual = 'System Administration Utilities' }
+ elsif (/^6/) { $manual = 'Games' }
+ else { $manual = 'User Commands' }
+ }
+}
+
+# Extract usage clause(s) [if any] for SYNOPSIS.
+if ($help_text =~ s/^Usage:( +(\S+))(.*)((?:\n(?: {6}\1| *or: +\S).*)*)//m)
+{
+ my @syn = $2 . $3;
+
+ if ($_ = $4)
+ {
+ s/^\n//;
+ for (split /\n/) { s/^ *(or: +)?//; push @syn, $_ }
+ }
+
+ my $synopsis = '';
+ for (@syn)
+ {
+ $synopsis .= ".br\n" if $synopsis;
+ s!^\S*/!!;
+ s/^(\S+) *//;
+ $synopsis .= ".B $1\n";
+ s/\s+$//;
+ s/(([][]|\.\.+)+)/\\fR$1\\fI/g;
+ s/^/\\fI/ unless s/^\\fR//;
+ $_ .= '\fR';
+ s/(\\fI)( *)/$2$1/g;
+ s/\\fI\\fR//g;
+ s/^\\fR//;
+ s/\\fI$//;
+ s/^\./\\&./;
+
+ $synopsis .= "$_\n";
+ }
+
+ $include{SYNOPSIS} ||= $synopsis;
+}
+
+# Process text, initial section is DESCRIPTION.
+my $sect = 'DESCRIPTION';
+$_ = "$help_text\n\n$version_text";
+
+# Normalise paragraph breaks.
+s/^\n+//;
+s/\n*$/\n/;
+s/\n\n+/\n\n/g;
+
+# Temporarily exchange leading dots, apostrophes and backslashes for
+# tokens.
+s/^\./\x80/mg;
+s/^'/\x81/mg;
+s/\\/\x82/g;
+
+# Start a new paragraph (if required) for these.
+s/([^\n])\n(Report +bugs|Email +bug +reports +to|Written +by)/$1\n\n$2/g;
+
+sub convert_option;
+
+while (length)
+{
+ # Convert some standard paragraph names.
+ if (s/^(Options|Examples): *\n//)
+ {
+ $sect = uc $1;
+ next;
+ }
+
+ # Copyright section
+ if (/^Copyright +[(\xa9]/)
+ {
+ $sect = 'COPYRIGHT';
+ $include{$sect} ||= '';
+ $include{$sect} .= ".PP\n" if $include{$sect};
+
+ my $copy;
+ ($copy, $_) = split /\n\n/, $_, 2;
+
+ for ($copy)
+ {
+ # Add back newline
+ s/\n*$/\n/;
+
+ # Convert iso9959-1 copyright symbol or (c) to nroff
+ # character.
+ s/^Copyright +(?:\xa9|\([Cc]\))/Copyright \\(co/mg;
+
+ # Insert line breaks before additional copyright messages
+ # and the disclaimer.
+ s/(.)\n(Copyright |This +is +free +software)/$1\n.br\n$2/g;
+
+ # Join hyphenated lines.
+ s/([A-Za-z])-\n */$1/g;
+ }
+
+ $include{$sect} .= $copy;
+ $_ ||= '';
+ next;
+ }
+
+ # Catch bug report text.
+ if (/^(Report +bugs|Email +bug +reports +to) /)
+ {
+ $sect = 'REPORTING BUGS';
+ }
+
+ # Author section.
+ elsif (/^Written +by/)
+ {
+ $sect = 'AUTHOR';
+ }
+
+ # Examples, indicated by an indented leading $, % or > are
+ # rendered in a constant width font.
+ if (/^( +)([\$\%>] )\S/)
+ {
+ my $indent = $1;
+ my $prefix = $2;
+ my $break = '.IP';
+ $include{$sect} ||= '';
+ while (s/^$indent\Q$prefix\E(\S.*)\n*//)
+ {
+ $include{$sect} .= "$break\n\\f(CW$prefix$1\\fR\n";
+ $break = '.br';
+ }
+
+ next;
+ }
+
+ my $matched = '';
+ $include{$sect} ||= '';
+
+ # Sub-sections have a trailing colon and the second line indented.
+ if (s/^(\S.*:) *\n / /)
+ {
+ $matched .= $& if %append;
+ $include{$sect} .= qq(.SS "$1"\n);
+ }
+
+ my $indent = 0;
+ my $content = '';
+
+ # Option with description.
+ if (s/^( {1,10}([+-]\S.*?))(?:( +(?!-))|\n( {20,}))(\S.*)\n//)
+ {
+ $matched .= $& if %append;
+ $indent = length ($4 || "$1$3");
+ $content = ".TP\n\x83$2\n\x83$5\n";
+ unless ($4)
+ {
+ # Indent may be different on second line.
+ $indent = length $& if /^ {20,}/;
+ }
+ }
+
+ # Option without description.
+ elsif (s/^ {1,10}([+-]\S.*)\n//)
+ {
+ $matched .= $& if %append;
+ $content = ".HP\n\x83$1\n";
+ $indent = 80; # not continued
+ }
+
+ # Indented paragraph with tag.
+ elsif (s/^( +(\S.*?) +)(\S.*)\n//)
+ {
+ $matched .= $& if %append;
+ $indent = length $1;
+ $content = ".TP\n\x83$2\n\x83$3\n";
+ }
+
+ # Indented paragraph.
+ elsif (s/^( +)(\S.*)\n//)
+ {
+ $matched .= $& if %append;
+ $indent = length $1;
+ $content = ".IP\n\x83$2\n";
+ }
+
+ # Left justified paragraph.
+ else
+ {
+ s/(.*)\n//;
+ $matched .= $& if %append;
+ $content = ".PP\n" if $include{$sect};
+ $content .= "$1\n";
+ }
+
+ # Append continuations.
+ while (s/^ {$indent}(\S.*)\n//)
+ {
+ $matched .= $& if %append;
+ $content .= "\x83$1\n"
+ }
+
+ # Move to next paragraph.
+ s/^\n+//;
+
+ for ($content)
+ {
+ # Leading dot and apostrophe protection.
+ s/\x83\./\x80/g;
+ s/\x83'/\x81/g;
+ s/\x83//g;
+
+ # Convert options.
+ s/(^| )(-[][\w=-]+)/$1 . convert_option $2/mge;
+ }
+
+ # Check if matched paragraph contains /pat/.
+ if (%append)
+ {
+ for my $pat (keys %append)
+ {
+ if ($matched =~ $pat)
+ {
+ $content .= ".PP\n" unless $append{$pat} =~ /^\./;
+ $content .= $append{$pat};
+ }
+ }
+ }
+
+ $include{$sect} .= $content;
+}
+
+# Refer to the real documentation.
+unless ($opt_no_info)
+{
+ my $info_page = $opt_info || $program;
+
+ $sect = 'SEE ALSO';
+ $include{$sect} ||= '';
+ $include{$sect} .= ".PP\n" if $include{$sect};
+ $include{$sect} .= <<EOT;
+The full documentation for
+.B $program
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B $program
+programs are properly installed at your site, the command
+.IP
+.B info $info_page
+.PP
+should give you access to the complete manual.
+EOT
+}
+
+# Output header.
+print <<EOT;
+.\\" DO NOT MODIFY THIS FILE! It was generated by $this_program $this_version.
+.TH $PROGRAM "$section" "$date" "$source" "$manual"
+EOT
+
+# Section ordering.
+my @pre = qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES);
+my @post = ('AUTHOR', 'REPORTING BUGS', 'COPYRIGHT', 'SEE ALSO');
+my $filter = join '|', @pre, @post;
+
+# Output content.
+for (@pre, (grep ! /^($filter)$/o, @include), @post)
+{
+ if ($include{$_})
+ {
+ my $quote = /\W/ ? '"' : '';
+ print ".SH $quote$_$quote\n";
+
+ for ($include{$_})
+ {
+ # Replace leading dot, apostrophe and backslash tokens.
+ s/\x80/\\&./g;
+ s/\x81/\\&'/g;
+ s/\x82/\\e/g;
+ print;
+ }
+ }
+}
+
+exit;
+
+# Convert option dashes to \- to stop nroff from hyphenating 'em, and
+# embolden. Option arguments get italicised.
+sub convert_option
+{
+ local $_ = '\fB' . shift;
+
+ s/-/\\-/g;
+ unless (s/\[=(.*)\]$/\\fR[=\\fI$1\\fR]/)
+ {
+ s/=(.)/\\fR=\\fI$1/;
+ s/ (.)/ \\fI$1/;
+ $_ .= '\fR';
+ }
+
+ $_;
+}
diff --git a/config/stdbool.m4 b/config/stdbool.m4
new file mode 100644
index 0000000..23f4954
--- /dev/null
+++ b/config/stdbool.m4
@@ -0,0 +1,66 @@
+# Check for stdbool.h that conforms to C99.
+
+# Copyright (C) 2002-2003 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# This macro is only needed in autoconf <= 2.54. Newer versions of autoconf
+# have this macro built-in.
+
+AC_DEFUN([AC_HEADER_STDBOOL],
+ [AC_CACHE_CHECK([for stdbool.h that conforms to C99],
+ [ac_cv_header_stdbool_h],
+ [AC_TRY_COMPILE(
+ [
+ #include <stdbool.h>
+ #ifndef bool
+ "error: bool is not defined"
+ #endif
+ #ifndef false
+ "error: false is not defined"
+ #endif
+ #if false
+ "error: false is not 0"
+ #endif
+ #ifndef true
+ "error: false is not defined"
+ #endif
+ #if true != 1
+ "error: true is not 1"
+ #endif
+ #ifndef __bool_true_false_are_defined
+ "error: __bool_true_false_are_defined is not defined"
+ #endif
+
+ struct s { _Bool s: 1; _Bool t; } s;
+
+ char a[true == 1 ? 1 : -1];
+ char b[false == 0 ? 1 : -1];
+ char c[__bool_true_false_are_defined == 1 ? 1 : -1];
+ char d[(bool) -0.5 == true ? 1 : -1];
+ bool e = &s;
+ char f[(_Bool) -0.0 == false ? 1 : -1];
+ char g[true];
+ char h[sizeof (_Bool)];
+ char i[sizeof s.t];
+ ],
+ [ return !a + !b + !c + !d + !e + !f + !g + !h + !i; ],
+ [ac_cv_header_stdbool_h=yes],
+ [ac_cv_header_stdbool_h=no])])
+ AC_CHECK_TYPES([_Bool])
+ if test $ac_cv_header_stdbool_h = yes; then
+ AC_DEFINE(HAVE_STDBOOL_H, 1, [Define to 1 if stdbool.h conforms to C99.])
+ fi])
diff --git a/config/strverscmp.m4 b/config/strverscmp.m4
new file mode 100644
index 0000000..bb82336
--- /dev/null
+++ b/config/strverscmp.m4
@@ -0,0 +1,24 @@
+# strverscmp.m4 serial 1
+dnl Copyright (C) 2002 Free Software Foundation, Inc.
+dnl This file is free software, distributed under the terms of the GNU
+dnl General Public License. As a special exception to the GNU General
+dnl Public License, this file may be distributed as part of a program
+dnl that contains a configuration script generated by Autoconf, under
+dnl the same distribution terms as the rest of that program.
+
+AC_DEFUN([gl_FUNC_STRVERSCMP],
+[
+ dnl Persuade glibc <string.h> to declare strverscmp().
+ AC_REQUIRE([AC_GNU_SOURCE])
+
+ AC_REPLACE_FUNCS(strverscmp)
+ if test $ac_cv_func_strverscmp = no; then
+ gl_PREREQ_STRVERSCMP
+ fi
+])
+
+# Prerequisites of lib/strverscmp.c.
+AC_DEFUN([gl_PREREQ_STRVERSCMP], [
+ :
+])
+
diff --git a/config/texi2dvi b/config/texi2dvi
new file mode 100755
index 0000000..010b586
--- /dev/null
+++ b/config/texi2dvi
@@ -0,0 +1,660 @@
+#! /bin/sh
+# texi2dvi --- produce DVI (or PDF) files from Texinfo (or LaTeX) sources.
+# $Id: texi2dvi,v 1.14 2003/02/05 00:42:33 karl Exp $
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001,
+# 2002, 2003 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can either send email to this
+# program's maintainer or write to: The Free Software Foundation,
+# Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
+#
+# Original author: Noah Friedman <friedman@gnu.org>.
+#
+# Please send bug reports, etc. to bug-texinfo@gnu.org.
+# If possible, please send a copy of the output of the script called with
+# the `--debug' option when making a bug report.
+
+# This string is expanded by rcs automatically when this file is checked out.
+rcs_revision='$Revision: 1.14 $'
+rcs_version=`set - $rcs_revision; echo $2`
+program=`echo $0 | sed -e 's!.*/!!'`
+version="texi2dvi (GNU Texinfo 4.5) $rcs_version
+
+Copyright (C) 2003 Free Software Foundation, Inc.
+There is NO warranty. You may redistribute this software
+under the terms of the GNU General Public License.
+For more information about these matters, see the files named COPYING."
+
+usage="Usage: $program [OPTION]... FILE...
+
+Run each Texinfo or LaTeX FILE through TeX in turn until all
+cross-references are resolved, building all indices. The directory
+containing each FILE is searched for included files. The suffix of FILE
+is used to determine its language (LaTeX or Texinfo).
+
+Makeinfo is used to perform Texinfo macro expansion before running TeX
+when needed.
+
+Operation modes:
+ -b, --batch no interaction
+ -c, --clean remove all auxiliary files
+ -D, --debug turn on shell debugging (set -x)
+ -h, --help display this help and exit successfully
+ -o, --output=OFILE leave output in OFILE (implies --clean);
+ Only one input FILE may be specified in this case
+ -q, --quiet no output unless errors (implies --batch)
+ -s, --silent same as --quiet
+ -v, --version display version information and exit successfully
+ -V, --verbose report on what is done
+
+TeX tuning:
+ -@ use @input instead of \input; for preloaded Texinfo
+ -e, -E, --expand force macro expansion using makeinfo
+ -I DIR search DIR for Texinfo files
+ -l, --language=LANG specify the LANG of FILE (LaTeX or Texinfo)
+ -p, --pdf use pdftex or pdflatex for processing
+ -t, --texinfo=CMD insert CMD after @setfilename in copy of input file
+ multiple values accumulate
+
+The values of the BIBTEX, LATEX (or PDFLATEX), MAKEINDEX, MAKEINFO,
+TEX (or PDFTEX), and TEXINDEX environment variables are used to run
+those commands, if they are set.
+
+Email bug reports to <bug-texinfo@gnu.org>,
+general questions and discussion to <help-texinfo@gnu.org>.
+Texinfo home page: http://www.gnu.org/software/texinfo/"
+
+# Initialize variables for option overriding and otherwise.
+# Don't use `unset' since old bourne shells don't have this command.
+# Instead, assign them an empty value.
+batch=false # eval for batch mode
+clean=
+debug=
+escape='\'
+expand= # t for expansion via makeinfo
+miincludes= # makeinfo include path
+oformat=dvi
+oname= # --output
+quiet= # by default let the tools' message be displayed
+set_language=
+textra=
+tmpdir=${TMPDIR:-/tmp}/t2d$$ # avoid collisions on 8.3 filesystems.
+txincludes= # TEXINPUTS extensions, with trailing colon
+txiprereq=19990129 # minimum texinfo.tex version to have macro expansion
+verbose=false # echo for verbose mode
+
+orig_pwd=`pwd`
+
+# Systems which define $COMSPEC or $ComSpec use semicolons to separate
+# directories in TEXINPUTS.
+if test -n "$COMSPEC$ComSpec"; then
+ path_sep=";"
+else
+ path_sep=":"
+fi
+
+# Pacify verbose cds.
+CDPATH=${ZSH_VERSION+.}$path_sep
+
+# In case someone crazy insists on using grep -E.
+: ${EGREP=egrep}
+
+# Save this so we can construct a new TEXINPUTS path for each file.
+TEXINPUTS_orig="$TEXINPUTS"
+# Unfortunately makeindex does not read TEXINPUTS.
+INDEXSTYLE_orig="$INDEXSTYLE"
+export TEXINPUTS INDEXSTYLE
+
+# Push a token among the arguments that will be used to notice when we
+# ended options/arguments parsing.
+# Use "set dummy ...; shift" rather than 'set - ..." because on
+# Solaris set - turns off set -x (but keeps set -e).
+# Use ${1+"$@"} rather than "$@" because Digital Unix and Ultrix 4.3
+# still expand "$@" to a single argument (the empty string) rather
+# than nothing at all.
+arg_sep="$$--$$"
+set dummy ${1+"$@"} "$arg_sep"; shift
+
+#
+# Parse command line arguments.
+while test x"$1" != x"$arg_sep"; do
+
+ # Handle --option=value by splitting apart and putting back on argv.
+ case "$1" in
+ --*=*)
+ opt=`echo "$1" | sed -e 's/=.*//'`
+ val=`echo "$1" | sed -e 's/[^=]*=//'`
+ shift
+ set dummy "$opt" "$val" ${1+"$@"}; shift
+ ;;
+ esac
+
+ # This recognizes --quark as --quiet. So what.
+ case "$1" in
+ -@ ) escape=@;;
+ # Silently and without documentation accept -b and --b[atch] as synonyms.
+ -b | --b*) batch=eval;;
+ -q | -s | --q* | --s*) quiet=t; batch=eval;;
+ -c | --c*) clean=t;;
+ -D | --d*) debug=t;;
+ -e | -E | --e*) expand=t;;
+ -h | --h*) echo "$usage"; exit 0;;
+ -I | --I*)
+ shift
+ miincludes="$miincludes -I $1"
+ txincludes="$txincludes$1$path_sep"
+ ;;
+ -l | --l*) shift; set_language=$1;;
+ -o | --o*)
+ shift
+ clean=t
+ case "$1" in
+ /* | ?:/*) oname=$1;;
+ *) oname="$orig_pwd/$1";;
+ esac;;
+ -p | --p*) oformat=pdf;;
+ -t | --t*) shift; textra="$textra\\
+$1";;
+ -v | --vers*) echo "$version"; exit 0;;
+ -V | --verb*) verbose=echo;;
+ --) # What remains are not options.
+ shift
+ while test x"$1" != x"$arg_sep"; do
+ set dummy ${1+"$@"} "$1"; shift
+ shift
+ done
+ break;;
+ -*)
+ echo "$0: Unknown or ambiguous option \`$1'." >&2
+ echo "$0: Try \`--help' for more information." >&2
+ exit 1;;
+ *) set dummy ${1+"$@"} "$1"; shift;;
+ esac
+ shift
+done
+# Pop the token
+shift
+
+# Interpret remaining command line args as filenames.
+case $# in
+ 0)
+ echo "$0: Missing file arguments." >&2
+ echo "$0: Try \`--help' for more information." >&2
+ exit 2
+ ;;
+ 1) ;;
+ *)
+ if test -n "$oname"; then
+ echo "$0: Can't use option \`--output' with more than one argument." >&2
+ exit 2
+ fi
+ ;;
+esac
+
+# Prepare the temporary directory. Remove it at exit, unless debugging.
+if test -z "$debug"; then
+ trap "cd / && rm -rf $tmpdir" 0 1 2 15
+fi
+
+# Create the temporary directory with strict rights
+(umask 077 && mkdir $tmpdir) || exit 1
+
+# Prepare the tools we might need. This may be extra work in some
+# cases, but improves the readibility of the script.
+utildir=$tmpdir/utils
+mkdir $utildir || exit 1
+
+# A sed script that preprocesses Texinfo sources in order to keep the
+# iftex sections only. We want to remove non TeX sections, and
+# comment (with `@c texi2dvi') TeX sections so that makeinfo does not
+# try to parse them. Nevertheless, while commenting TeX sections,
+# don't comment @macro/@end macro so that makeinfo does propagate
+# them. Unfortunately makeinfo --iftex --no-ifhtml --no-ifinfo
+# doesn't work well enough (yet) to use that, so work around with sed.
+comment_iftex_sed=$utildir/comment.sed
+cat <<EOF >$comment_iftex_sed
+/^@tex/,/^@end tex/{
+ s/^/@c texi2dvi/
+}
+/^@iftex/,/^@end iftex/{
+ s/^/@c texi2dvi/
+ /^@c texi2dvi@macro/,/^@c texi2dvi@end macro/{
+ s/^@c texi2dvi//
+ }
+}
+/^@html/,/^@end html/{
+ s/^/@c (texi2dvi)/
+}
+/^@ifhtml/,/^@end ifhtml/{
+ s/^/@c (texi2dvi)/
+}
+/^@ifnottex/,/^@end ifnottex/{
+ s/^/@c (texi2dvi)/
+}
+/^@ifinfo/,/^@end ifinfo/{
+ /^@node/p
+ /^@menu/,/^@end menu/p
+ t
+ s/^/@c (texi2dvi)/
+}
+s/^@ifnotinfo/@c texi2dvi@ifnotinfo/
+s/^@end ifnotinfo/@c texi2dvi@end ifnotinfo/
+EOF
+# Uncommenting is simple: Remove any leading `@c texi2dvi'.
+uncomment_iftex_sed=$utildir/uncomment.sed
+cat <<EOF >$uncomment_iftex_sed
+s/^@c texi2dvi//
+EOF
+
+# A shell script that computes the list of xref files.
+# Takes the filename (without extension) of which we look for xref
+# files as argument. The index files must be reported last.
+get_xref_files=$utildir/get_xref.sh
+cat <<\EOF >$get_xref_files
+#! /bin/sh
+
+# Get list of xref files (indexes, tables and lists).
+# Find all files having root filename with a two-letter extension,
+# saves the ones that are really Texinfo-related files. .?o? catches
+# many files: .toc, .log, LaTeX tables and lists, FiXme's .lox, maybe more.
+for this_file in "$1".?o? "$1".aux "$1".?? "$1".idx; do
+ # If file is empty, skip it.
+ test -s "$this_file" || continue
+ # If the file is not suitable to be an index or xref file, don't
+ # process it. The file can't be if its first character is not a
+ # backslash or single quote.
+ first_character=`sed -n '1s/^\(.\).*$/\1/p;q' $this_file`
+ if test "x$first_character" = "x\\" \
+ || test "x$first_character" = "x'"; then
+ xref_files="$xref_files ./$this_file"
+ fi
+done
+echo "$xref_files"
+EOF
+chmod 500 $get_xref_files
+
+# File descriptor usage:
+# 0 standard input
+# 1 standard output (--verbose messages)
+# 2 standard error
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 5 tools output (turned off by --quiet)
+
+# Tools' output. If quiet, discard, else redirect to the message flow.
+if test "$quiet" = t; then
+ exec 5>/dev/null
+else
+ exec 5>&1
+fi
+
+# Enable tracing
+test "$debug" = t && set -x
+
+#
+# TeXify files.
+
+for command_line_filename in ${1+"$@"}; do
+ $verbose "Processing $command_line_filename ..."
+
+ # If the COMMAND_LINE_FILENAME is not absolute (e.g., --debug.tex),
+ # prepend `./' in order to avoid that the tools take it as an option.
+ echo "$command_line_filename" | $EGREP '^(/|[A-z]:/)' >/dev/null \
+ || command_line_filename="./$command_line_filename"
+
+ # See if the file exists. If it doesn't we're in trouble since, even
+ # though the user may be able to reenter a valid filename at the tex
+ # prompt (assuming they're attending the terminal), this script won't
+ # be able to find the right xref files and so forth.
+ if test ! -r "$command_line_filename"; then
+ echo "$0: Could not read $command_line_filename, skipping." >&2
+ continue
+ fi
+
+ # Get the name of the current directory. We want the full path
+ # because in clean mode we are in tmp, in which case a relative
+ # path has no meaning.
+ filename_dir=`echo $command_line_filename | sed 's!/[^/]*$!!;s!^$!.!'`
+ filename_dir=`cd "$filename_dir" >/dev/null && pwd`
+
+ # Strip directory part but leave extension.
+ filename_ext=`basename "$command_line_filename"`
+ # Strip extension.
+ filename_noext=`echo "$filename_ext" | sed 's/\.[^.]*$//'`
+ ext=`echo "$filename_ext" | sed 's/^.*\.//'`
+
+ # _src. Use same basename since we want to generate aux files with
+ # the same basename as the manual. If --expand, then output the
+ # macro-expanded file to here, else copy the original file.
+ tmpdir_src=$tmpdir/src
+ filename_src=$tmpdir_src/$filename_noext.$ext
+
+ # _xtr. The file with the user's extra commands.
+ tmpdir_xtr=$tmpdir/xtr
+ filename_xtr=$tmpdir_xtr/$filename_noext.$ext
+
+ # _bak. Copies of the previous xref files (another round is run if
+ # they differ from the new one).
+ tmpdir_bak=$tmpdir/bak
+
+ # Make all those directories and give up if we can't succeed.
+ mkdir $tmpdir_src $tmpdir_xtr $tmpdir_bak || exit 1
+
+ # Source file might include additional sources.
+ # We want `.:$orig_pwd' before anything else. (We'll add `.:' later
+ # after all other directories have been turned into absolute paths.)
+ # `.' goes first to ensure that any old .aux, .cps,
+ # etc. files in ${directory} don't get used in preference to fresher
+ # files in `.'. Include orig_pwd in case we are in clean mode, where
+ # we've cd'd to a temp directory.
+ common="$orig_pwd$path_sep$filename_dir$path_sep$txincludes"
+ TEXINPUTS="$common$TEXINPUTS_orig"
+ INDEXSTYLE="$common$INDEXSTYLE_orig"
+
+ # Convert relative paths to absolute paths, so we can run in another
+ # directory (e.g., in --clean mode, or during the macro-support
+ # detection.)
+ #
+ # Empty path components are meaningful to tex. We rewrite them
+ # as `EMPTY' so they don't get lost when we split on $path_sep.
+ TEXINPUTS=`echo $TEXINPUTS |sed 's/^:/EMPTY:/;s/:$/:EMPTY/;s/::/:EMPTY:/g'`
+ INDEXSTYLE=`echo $INDEXSTYLE |sed 's/^:/EMPTY:/;s/:$/:EMPTY/;s/::/:EMPTY:/g'`
+ save_IFS=$IFS
+ IFS=$path_sep
+ set x $TEXINPUTS; shift
+ TEXINPUTS=.
+ for dir
+ do
+ case $dir in
+ EMPTY)
+ TEXINPUTS=$TEXINPUTS$path_sep
+ ;;
+ [\\/]* | ?:[\\/]*) # Absolute paths don't need to be expansed.
+ TEXINPUTS=$TEXINPUTS$path_sep$dir
+ ;;
+ *)
+ abs=`cd "$dir" && pwd` && TEXINPUTS=$TEXINPUTS$path_sep$abs
+ ;;
+ esac
+ done
+ set x $INDEXSTYLE; shift
+ INDEXSTYLE=.
+ for dir
+ do
+ case $dir in
+ EMPTY)
+ INDEXSTYLE=$INDEXSTYLE$path_sep
+ ;;
+ [\\/]* | ?:[\\/]*) # Absolute paths don't need to be expansed.
+ INDEXSTYLE=$INDEXSTYLE$path_sep$dir
+ ;;
+ *)
+ abs=`cd "$dir" && pwd` && INDEXSTYLE=$INDEXSTYLE$path_sep$abs
+ ;;
+ esac
+ done
+ IFS=$save_IFS
+
+ # If the user explicitly specified the language, use that.
+ # Otherwise, if the first line is \input texinfo, assume it's texinfo.
+ # Otherwise, guess from the file extension.
+ if test -n "$set_language"; then
+ language=$set_language
+ elif sed 1q "$command_line_filename" | grep 'input texinfo' >/dev/null; then
+ language=texinfo
+ else
+ language=
+ fi
+
+ # Get the type of the file (latex or texinfo) from the given language
+ # we just guessed, or from the file extension if not set yet.
+ case ${language:-$filename_ext} in
+ [lL]a[tT]e[xX] | *.ltx | *.tex)
+ # Assume a LaTeX file. LaTeX needs bibtex and uses latex for
+ # compilation. No makeinfo.
+ bibtex=${BIBTEX:-bibtex}
+ makeinfo= # no point in running makeinfo on latex source.
+ texindex=${MAKEINDEX:-makeindex}
+ if test $oformat = dvi; then
+ tex=${LATEX:-latex}
+ else
+ tex=${PDFLATEX:-pdflatex}
+ fi
+ ;;
+
+ *)
+ # Assume a Texinfo file. Texinfo files need makeinfo, texindex and tex.
+ bibtex=
+ texindex=${TEXINDEX:-texindex}
+ if test $oformat = dvi; then
+ tex=${TEX:-tex}
+ else
+ tex=${PDFTEX:-pdftex}
+ fi
+ # Unless required by the user, makeinfo expansion is wanted only
+ # if texinfo.tex is too old.
+ if test "$expand" = t; then
+ makeinfo=${MAKEINFO:-makeinfo}
+ else
+ # Check if texinfo.tex performs macro expansion by looking for
+ # its version. The version is a date of the form YEAR-MO-DA.
+ # We don't need to use [0-9] to match the digits since anyway
+ # the comparison with $txiprereq, a number, will fail with non
+ # digits.
+ txiversion_tex=txiversion.tex
+ echo '\input texinfo.tex @bye' >$tmpdir/$txiversion_tex
+ # Run in the tmpdir to avoid leaving files.
+ eval `cd $tmpdir >/dev/null &&
+ $tex $txiversion_tex 2>/dev/null |
+ sed -n 's/^.*\[\(.*\)version \(....\)-\(..\)-\(..\).*$/txiformat=\1 txiversion="\2\3\4"/p'`
+ $verbose "texinfo.tex preloaded as \`$txiformat', version is \`$txiversion' ..."
+ if test "$txiprereq" -le "$txiversion" >/dev/null 2>&1; then
+ makeinfo=
+ else
+ makeinfo=${MAKEINFO:-makeinfo}
+ fi
+ # As long as we had to run TeX, offer the user this convenience
+ if test "$txiformat" = Texinfo; then
+ escape=@
+ fi
+ fi
+ ;;
+ esac
+
+ # Expand macro commands in the original source file using Makeinfo.
+ # Always use `end' footnote style, since the `separate' style
+ # generates different output (arguably this is a bug in -E).
+ # Discard main info output, the user asked to run TeX, not makeinfo.
+ if test -n "$makeinfo"; then
+ $verbose "Macro-expanding $command_line_filename to $filename_src ..."
+ sed -f $comment_iftex_sed "$command_line_filename" \
+ | $makeinfo --footnote-style=end -I "$filename_dir" $miincludes \
+ -o /dev/null --macro-expand=- \
+ | sed -f $uncomment_iftex_sed >"$filename_src"
+ filename_input=$filename_src
+ fi
+
+ # If makeinfo failed (or was not even run), use the original file as input.
+ if test $? -ne 0 \
+ || test ! -r "$filename_src"; then
+ $verbose "Reverting to $command_line_filename ..."
+ filename_input=$filename_dir/$filename_ext
+ fi
+
+ # Used most commonly for @finalout, @smallbook, etc.
+ if test -n "$textra"; then
+ $verbose "Inserting extra commands: $textra"
+ sed '/^@setfilename/a\
+'"$textra" "$filename_input" >$filename_xtr
+ filename_input=$filename_xtr
+ fi
+
+ # If clean mode was specified, then move to the temporary directory.
+ if test "$clean" = t; then
+ $verbose "cd $tmpdir_src"
+ cd "$tmpdir_src" || exit 1
+ fi
+
+ while :; do # will break out of loop below
+ orig_xref_files=`$get_xref_files "$filename_noext"`
+
+ # Save copies of originals for later comparison.
+ if test -n "$orig_xref_files"; then
+ $verbose "Backing up xref files: `echo $orig_xref_files | sed 's|\./||g'`"
+ cp $orig_xref_files $tmpdir_bak
+ fi
+
+ # Run bibtex on current file.
+ # - If its input (AUX) exists.
+ # - If AUX contains both `\bibdata' and `\bibstyle'.
+ # - If some citations are missing (LOG contains `Citation').
+ # or the LOG complains of a missing .bbl
+ #
+ # We run bibtex first, because I can see reasons for the indexes
+ # to change after bibtex is run, but I see no reason for the
+ # converse.
+ #
+ # Don't try to be too smart. Running bibtex only if the bbl file
+ # exists and is older than the LaTeX file is wrong, since the
+ # document might include files that have changed. Because there
+ # can be several AUX (if there are \include's), but a single LOG,
+ # looking for missing citations in LOG is easier, though we take
+ # the risk to match false messages.
+ if test -n "$bibtex" \
+ && test -r "$filename_noext.aux" \
+ && test -r "$filename_noext.log" \
+ && (grep '^\\bibdata[{]' "$filename_noext.aux" \
+ && grep '^\\bibstyle[{]' "$filename_noext.aux" \
+ && (grep 'Warning:.*Citation.*undefined' "$filename_noext.log" \
+ || grep 'No file .*\.bbl\.' "$filename_noext.log")) \
+ >/dev/null 2>&1; \
+ then
+ $verbose "Running $bibtex $filename_noext ..."
+ if $bibtex "$filename_noext" >&5; then :; else
+ echo "$0: $bibtex exited with bad status, quitting." >&2
+ exit 1
+ fi
+ fi
+
+ # What we'll run texindex on -- exclude non-index files.
+ # Since we know index files are last, it is correct to remove everything
+ # before .aux and .?o?. But don't really do <anything>o<anything>
+ # -- don't match whitespace as <anything>.
+ # Otherwise, if orig_xref_files contains something like
+ # foo.xo foo.whatever
+ # the space after the o will get matched.
+ index_files=`echo "$orig_xref_files" \
+ | sed "s!.*\.aux!!g;
+ s!./$filename_noext\.[^ ]o[^ ]!!g;
+ s/^[ ]*//;s/[ ]*$//"`
+ # Run texindex (or makeindex) on current index files. If they
+ # already exist, and after running TeX a first time the index
+ # files don't change, then there's no reason to run TeX again.
+ # But we won't know that if the index files are out of date or
+ # nonexistent.
+ if test -n "$texindex" && test -n "$index_files"; then
+ $verbose "Running $texindex $index_files ..."
+ if $texindex $index_files 2>&5 1>&2; then :; else
+ echo "$0: $texindex exited with bad status, quitting." >&2
+ exit 1
+ fi
+ fi
+
+ # Finally, run TeX.
+ # Prevent $ESCAPE from being interpreted by the shell if it happens
+ # to be `/'.
+ $batch tex_args="\\${escape}nonstopmode\ \\${escape}input"
+ cmd="$tex $tex_args $filename_input"
+ $verbose "Running $cmd ..."
+ if $cmd >&5; then :; else
+ echo "$0: $tex exited with bad status, quitting." >&2
+ echo "$0: see $filename_noext.log for errors." >&2
+ test "$clean" = t \
+ && cp "$filename_noext.log" "$orig_pwd"
+ exit 1
+ fi
+
+
+ # Decide if looping again is needed.
+ finished=t
+
+ # LaTeX (and the package changebar) report in the LOG file if it
+ # should be rerun. This is needed for files included from
+ # subdirs, since texi2dvi does not try to compare xref files in
+ # subdirs. Performing xref files test is still good since LaTeX
+ # does not report changes in xref files.
+ if grep "Rerun to get" "$filename_noext.log" >/dev/null 2>&1; then
+ finished=
+ fi
+
+ # Check if xref files changed.
+ new_xref_files=`$get_xref_files "$filename_noext"`
+ $verbose "Original xref files = `echo $orig_xref_files | sed 's|\./||g'`"
+ $verbose "New xref files = `echo $new_xref_files | sed 's|\./||g'`"
+
+ # If old and new lists don't at least have the same file list,
+ # then one file or another has definitely changed.
+ test "x$orig_xref_files" != "x$new_xref_files" && finished=
+
+ # File list is the same. We must compare each file until we find
+ # a difference.
+ if test -n "$finished"; then
+ for this_file in $new_xref_files; do
+ $verbose "Comparing xref file `echo $this_file | sed 's|\./||g'` ..."
+ # cmp -s returns nonzero exit status if files differ.
+ if cmp -s "$this_file" "$tmpdir_bak/$this_file"; then :; else
+ # We only need to keep comparing until we find one that
+ # differs, because we'll have to run texindex & tex again no
+ # matter how many more there might be.
+ finished=
+ $verbose "xref file `echo $this_file | sed 's|\./||g'` differed ..."
+ test "$debug" = t && diff -c "$tmpdir_bak/$this_file" "$this_file"
+ break
+ fi
+ done
+ fi
+
+ # If finished, exit the loop, else rerun the loop.
+ test -n "$finished" && break
+ done
+
+ # If we were in clean mode, compilation was in a tmp directory.
+ # Copy the DVI (or PDF) file into the directory where the compilation
+ # has been done. (The temp dir is about to get removed anyway.)
+ # We also return to the original directory so that
+ # - the next file is processed in correct conditions
+ # - the temporary file can be removed
+ if test -n "$clean"; then
+ if test -n "$oname"; then
+ dest=$oname
+ else
+ dest=$orig_pwd
+ fi
+ $verbose "Copying $oformat file from `pwd` to $dest"
+ cp -p "./$filename_noext.$oformat" "$dest"
+ cd / # in case $orig_pwd is on a different drive (for DOS)
+ cd $orig_pwd || exit 1
+ fi
+
+ # Remove temporary files.
+ if test "x$debug" = "x"; then
+ $verbose "Removing $tmpdir_src $tmpdir_xtr $tmpdir_bak ..."
+ cd /
+ rm -rf $tmpdir_src $tmpdir_xtr $tmpdir_bak
+ fi
+done
+
+$verbose "$0 done."
+exit 0 # exit successfully, not however we ended the loop.
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..e08c9a3
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,139 @@
+dnl Process this file with -*- autoconf -*- to produce a configure script.
+AC_INIT(sed, 4.1.2, bonzini@gnu.org, sed)
+AC_CONFIG_AUX_DIR(config)
+AC_CONFIG_SRCDIR([sed/sed.c])
+AM_CONFIG_HEADER(config.h:config_h.in)
+AC_PREREQ(2.53)
+AM_INIT_AUTOMAKE
+
+SED_FEATURE_VERSION=4.1
+AC_DEFINE_UNQUOTED(SED_FEATURE_VERSION, "$SED_FEATURE_VERSION",
+ [Define to the version of GNU sed whose features are supported by this sed.])
+AC_SUBST(SED_FEATURE_VERSION)
+
+AC_PROG_CC
+AC_PROG_RANLIB
+AC_GNU_SOURCE
+AC_AIX
+AC_MINIX
+AC_ISC_POSIX
+AC_SYS_LARGEFILE
+AC_SYS_LONG_FILE_NAMES
+
+AC_MSG_CHECKING([whether -lcP is needed])
+AC_TRY_RUN([
+#include <stdio.h>
+#include <errno.h>
+
+int main()
+{
+ FILE *fp;
+ int result;
+ errno = 0;
+ fp = fopen ("conftest.c", "r");
+ if (!fp) return 0; /* error, assume not needed */
+ result = fflush (fp) == EOF && errno == 0;
+ fclose (fp);
+ return result;
+}], [AC_MSG_RESULT(no)],
+ [AC_MSG_RESULT(yes)
+ LIBS="-lcP $LIBS"],
+ [AC_MSG_RESULT([assuming no])
+])
+
+AC_HEADER_DIRENT
+AC_CHECK_HEADERS(io.h limits.h locale.h stdarg.h alloca.h stddef.h errno.h \
+ wchar.h wctype.h sys/file.h, [], [], [AC_INCLUDES_DEFAULT])
+AC_C_CONST
+AC_TYPE_SIZE_T
+AC_CHECK_TYPE(ssize_t, int)
+
+AC_HEADER_STDBOOL
+if test "$ac_cv_type__Bool" = no; then
+ HAVE__BOOL=0
+else
+ HAVE__BOOL=1
+fi
+AC_SUBST(HAVE__BOOL)
+if test "$ac_cv_header_stdbool_h" = no; then
+ AC_CONFIG_FILES(lib/stdbool.h:lib/stdbool_.h)
+fi
+
+AC_FUNC_ALLOCA
+AC_FUNC_VPRINTF
+AM_FUNC_GETLINE
+AC_FUNC_OBSTACK
+AC_FUNC_MBRTOWC
+AC_TYPE_MBSTATE_T
+gl_FUNC_STRVERSCMP
+AC_REPLACE_FUNCS(memchr memcmp memmove strerror mkstemp)
+AC_CHECK_FUNCS(isatty bcopy bzero isascii memcpy memset strchr strtoul popen \
+ pathconf isblank fchown fchmod setlocale wcrtomb wcscoll btowc)
+
+AC_ARG_WITH(included-regex,
+[ --with-included-regex use included regex matcher (default=yes)], ,
+with_included_regex=yes)
+
+AC_ARG_ENABLE(regex-tests,
+[ --enable-regex-tests enable regex matcher regression tests (default=no)], ,
+enable_regex_tests=no)
+
+if test "x$with_included_regex" = xno; then
+ AC_CHECK_HEADERS(regex.h)
+ AC_CHECK_LIB(regex, re_search)
+ AC_CHECK_FUNC(re_search)
+ if test $ac_cv_header_regex_h = no || test $ac_cv_func_re_search = no; then
+ AC_MSG_WARN([GNU regex not found, falling back to the included version])
+ with_included_regex=yes
+ fi
+fi
+
+if test "x$with_included_regex" = xno; then
+ enable_regex_tests=no
+fi
+
+AM_CONDITIONAL(TEST_REGEX, test "x$enable_regex_tests" != xno)
+if test "x$with_included_regex" != xno; then
+ AC_CONFIG_LINKS(lib/regex.h:lib/regex_.h)
+ AC_LIBOBJ(regex)
+fi
+
+AC_ARG_ENABLE(html,
+[ --enable-html build HTML manual (default=no)], ,
+enable_html=no)
+
+AM_CONDITIONAL(BUILD_HTML, test "x$enable_html" != xno)
+
+: ${TEXI2HTML=texi2html -monolithic}
+AC_SUBST(TEXI2HTML)
+
+AC_MSG_CHECKING(how to build HTML documentation)
+if eval $am_missing_run makeinfo --help 2>&1 | grep .-html > /dev/null; then
+ AC_MSG_RESULT(with makeinfo)
+ enable_html=makeinfo
+else
+ if $TEXI2HTML --help 2>&1 | grep monolithic > /dev/null; then
+ AC_MSG_RESULT(with texi2html)
+ enable_html=texi2html
+ else
+ AC_MSG_RESULT(not built)
+ if test "x$enable_html" != xno; then
+ AC_MSG_ERROR(cannot build HTML documentation, install makeinfo 4.0 or texi2html)
+ fi
+ enable_html=no
+ fi
+fi
+
+AM_CONDITIONAL(MAKEINFO_HTML, test "x$enable_html" = xmakeinfo)
+AM_CONDITIONAL(TEXI2HTML_HTML, test "x$enable_html" = xtexi2html)
+
+
+ALL_LINGUAS="af ca cs da de el eo es et fi fr ga gl he hr hu id it ja ko nl pl pt_BR ro ru sk sl sr sv tr zh_CN"
+AM_GNU_GETTEXT_VERSION(0.11)
+AM_GNU_GETTEXT(, need-ngettext)
+
+AC_CONFIG_FILES([bootstrap.sh], chmod +x bootstrap.sh)
+AC_CONFIG_FILES([Makefile doc/Makefile \
+lib/Makefile sed/Makefile testsuite/Makefile \
+po/Makefile.in intl/Makefile])
+AC_OUTPUT
diff --git a/doc/Makefile.am b/doc/Makefile.am
new file mode 100644
index 0000000..d96c2fa
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,52 @@
+## Process this file with automake to produce Makefile.in
+info_TEXINFOS = sed.texi
+sed_TEXINFOS = config.texi version.texi
+dist_man_MANS = sed.1
+dist_noinst_DATA = sed.x sed-in.texi
+dist_noinst_SCRIPTS = groupify.sed
+CLEANFILES = sed.html
+TEXI2DVI = $(top_srcdir)/config/texi2dvi --expand
+HELP2MAN = $(top_srcdir)/config/help2man
+SED = $(top_builddir)/sed/sed
+
+# To produce better quality output, in the example sed
+# scripts we group comments with lines following them;
+# since mantaining the "@group...@end group" manually
+# is a burden, we do this automatically
+sed.texi: sed-in.texi groupify.sed
+ sed -nf groupify.sed < $(srcdir)/sed-in.texi > $(srcdir)/sed.texi
+
+sed.1: $(top_srcdir)/sed/sed.c $(top_srcdir)/configure.ac sed.x
+ $(HELP2MAN) -p sed --include sed.x $(SED) > $(srcdir)/sed.1
+
+dist-hook:
+ touch $(distdir)/sed.1
+
+# Having a dependancy on sed.info automatically makes
+# sed.html dependant on sed.texi and all the included
+# sources
+if MAKEINFO_HTML
+sed.html: sed.texi sed.info
+ cd $(srcdir) \
+ && $(MAKEINFO) --html `echo $< | sed 's,.*/,,'`
+
+html: sed.html
+
+.PHONY: html
+endif
+
+# These rules are used together with TEXI2HTML
+if TEXI2HTML_HTML
+sed.html: sed.texi sed.info
+ cd $(srcdir) \
+ && $(TEXI2HTML) `echo $< | sed 's,.*/,,'`
+
+html: sed.html
+
+.PHONY: html
+endif
+
+# This rule is used if --enable-html is passed
+if BUILD_HTML
+all: html
+endif
diff --git a/doc/config.texi b/doc/config.texi
new file mode 100644
index 0000000..aa5e35a
--- /dev/null
+++ b/doc/config.texi
@@ -0,0 +1,9 @@
+@dircategory Text creation and manipulation
+@direntry
+* sed: (sed). Stream EDitor.
+
+@end direntry
+
+@clear PERL
+@set SSEDEXT @acronym{GNU} extensions
+@set SSED @acronym{GNU} @command{sed}
diff --git a/doc/groupify.sed b/doc/groupify.sed
new file mode 100755
index 0000000..2430710
--- /dev/null
+++ b/doc/groupify.sed
@@ -0,0 +1,59 @@
+#! /bin/sed -nf
+# Script to add @group...@end group tags to sed.texi.in
+# so that comments are not separated from the instructions
+# that they refer to.
+
+# Step 1: search for the conventional "@c start----" comment
+1a\
+@c Do not edit this file!! It is automatically generated from sed-in.texi.
+p
+/^@c start-*$/! b
+
+# Step 2: loop until we find a @ command
+:a
+n
+p
+/^@/! ba
+
+# Step 3: process everything until a "@end" command
+
+# Step 3.1: Print the blank lines before the group. If we reach the "@end",
+# we go back to step 1.
+:b
+n
+/^@end/ {
+ p
+ b
+}
+/^[ ]*$/ {
+ p
+ bb
+}
+
+# Step 3.2: Add to hold space every line until an empty one or "@end"
+h
+:c
+n
+/^@end example/! {
+ /^[ ]*$/! {
+ H
+ bc
+ }
+}
+
+# Step 3.3: Working in hold space, add @group...@end group if there are
+# at least two lines. Then print the lines we processed and
+# switch back to pattern space.
+x
+/\n/ {
+ s/.*/@group\
+&\
+@end group/
+}
+p
+
+# Step 3.4: Switch back to pattern space, print the first blank line
+# and possibly go back to step 3.1
+x
+p
+/^@end/ !bb
diff --git a/doc/sed-in.texi b/doc/sed-in.texi
new file mode 100644
index 0000000..6cd4140
--- /dev/null
+++ b/doc/sed-in.texi
@@ -0,0 +1,4026 @@
+\input texinfo @c -*-texinfo-*-
+@c
+@c -- Stuff that needs adding: ----------------------------------------------
+@c (document the `;' command-separator)
+@c --------------------------------------------------------------------------
+@c Check for consistency: regexps in @code, text that they match in @samp.
+@c
+@c Tips:
+@c @command for command
+@c @samp for command fragments: @samp{cat -s}
+@c @code for sed commands and flags
+@c Use ``quote'' not `quote' or "quote".
+@c
+@c %**start of header
+@setfilename sed.info
+@settitle sed, a stream editor
+@c %**end of header
+
+@c @smallbook
+
+@include version.texi
+
+@c Combine indices.
+@syncodeindex ky cp
+@syncodeindex pg cp
+@syncodeindex tp cp
+
+@defcodeindex op
+@syncodeindex op fn
+
+@include config.texi
+
+@copying
+This file documents version @value{VERSION} of
+@value{SSED}, a stream editor.
+
+Copyright @copyright{} 1998, 1999, 2001, 2002, 2003, 2004 Free
+Software Foundation, Inc.
+
+This document is released under the terms of the @acronym{GNU} Free
+Documentation License as published by the Free Software Foundation;
+either version 1.1, or (at your option) any later version.
+
+You should have received a copy of the @acronym{GNU} Free Documentation
+License along with @value{SSED}; see the file @file{COPYING.DOC}.
+If not, write to the Free Software Foundation, 59 Temple Place - Suite
+330, Boston, MA 02111-1307, USA.
+
+There are no Cover Texts and no Invariant Sections; this text, along
+with its equivalent in the printed manual, constitutes the Title Page.
+@end copying
+
+@setchapternewpage off
+
+@titlepage
+@title @command{sed}, a stream editor
+@subtitle version @value{VERSION}, @value{UPDATED}
+@author by Ken Pizzini, Paolo Bonzini
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc.
+
+@insertcopying
+
+Published by the Free Software Foundation, @*
+59 Temple Place - Suite 330 @*
+Boston, MA 02111-1307, USA
+@end titlepage
+
+
+@node Top
+@top
+
+@ifnottex
+@insertcopying
+@end ifnottex
+
+@menu
+* Introduction:: Introduction
+* Invoking sed:: Invocation
+* sed Programs:: @command{sed} programs
+* Examples:: Some sample scripts
+* Limitations:: Limitations and (non-)limitations of @value{SSED}
+* Other Resources:: Other resources for learning about @command{sed}
+* Reporting Bugs:: Reporting bugs
+
+* Extended regexps:: @command{egrep}-style regular expressions
+@ifset PERL
+* Perl regexps:: Perl-style regular expressions
+@end ifset
+
+* Concept Index:: A menu with all the topics in this manual.
+* Command and Option Index:: A menu with all @command{sed} commands and
+ command-line options.
+
+@detailmenu
+--- The detailed node listing ---
+
+sed Programs:
+* Execution Cycle:: How @command{sed} works
+* Addresses:: Selecting lines with @command{sed}
+* Regular Expressions:: Overview of regular expression syntax
+* Common Commands:: Often used commands
+* The "s" Command:: @command{sed}'s Swiss Army Knife
+* Other Commands:: Less frequently used commands
+* Programming Commands:: Commands for @command{sed} gurus
+* Extended Commands:: Commands specific of @value{SSED}
+* Escapes:: Specifying special characters
+
+Examples:
+* Centering lines::
+* Increment a number::
+* Rename files to lower case::
+* Print bash environment::
+* Reverse chars of lines::
+* tac:: Reverse lines of files
+* cat -n:: Numbering lines
+* cat -b:: Numbering non-blank lines
+* wc -c:: Counting chars
+* wc -w:: Counting words
+* wc -l:: Counting lines
+* head:: Printing the first lines
+* tail:: Printing the last lines
+* uniq:: Make duplicate lines unique
+* uniq -d:: Print duplicated lines of input
+* uniq -u:: Remove all duplicated lines
+* cat -s:: Squeezing blank lines
+
+@ifset PERL
+Perl regexps:: Perl-style regular expressions
+* Backslash:: Introduces special sequences
+* Circumflex/dollar sign/period:: Behave specially with regard to new lines
+* Square brackets:: Are a bit different in strange cases
+* Options setting:: Toggle modifiers in the middle of a regexp
+* Non-capturing subpatterns:: Are not counted when backreferencing
+* Repetition:: Allows for non-greedy matching
+* Backreferences:: Allows for more than 10 back references
+* Assertions:: Allows for complex look ahead matches
+* Non-backtracking subpatterns:: Often gives more performance
+* Conditional subpatterns:: Allows if/then/else branches
+* Recursive patterns:: For example to match parentheses
+* Comments:: Because things can get complex...
+@end ifset
+
+@end detailmenu
+@end menu
+
+
+@node Introduction
+@chapter Introduction
+
+@cindex Stream editor
+@command{sed} is a stream editor.
+A stream editor is used to perform basic text
+transformations on an input stream
+(a file or input from a pipeline).
+While in some ways similar to an editor which
+permits scripted edits (such as @command{ed}),
+@command{sed} works by making only one pass over the
+input(s), and is consequently more efficient.
+But it is @command{sed}'s ability to filter text in a pipeline
+which particularly distinguishes it from other types of
+editors.
+
+
+@node Invoking sed
+@chapter Invocation
+
+Normally @command{sed} is invoked like this:
+
+@example
+sed SCRIPT INPUTFILE...
+@end example
+
+The full format for invoking @command{sed} is:
+
+@example
+sed OPTIONS... [SCRIPT] [INPUTFILE...]
+@end example
+
+If you do not specify @var{INPUTFILE}, or if @var{INPUTFILE} is @file{-},
+@command{sed} filters the contents of the standard input. The @var{script}
+is actually the first non-option parameter, which @command{sed} specially
+considers a script and not an input file if (and only if) none of the
+other @var{options} specifies a script to be executed, that is if neither
+of the @option{-e} and @option{-f} options is specified.
+
+@command{sed} may be invoked with the following command-line options:
+
+@table @code
+@item --version
+@opindex --version
+@cindex Version, printing
+Print out the version of @command{sed} that is being run and a copyright notice,
+then exit.
+
+@item --help
+@opindex --help
+@cindex Usage summary, printing
+Print a usage message briefly summarizing these command-line options
+and the bug-reporting address,
+then exit.
+
+@item -n
+@itemx --quiet
+@itemx --silent
+@opindex -n
+@opindex --quiet
+@opindex --silent
+@cindex Disabling autoprint, from command line
+By default, @command{sed} prints out the pattern space
+at the end of each cycle through the script.
+These options disable this automatic printing,
+and @command{sed} only produces output when explicitly told to
+via the @code{p} command.
+
+@item -i[@var{SUFFIX}]
+@itemx --in-place[=@var{SUFFIX}]
+@opindex -i
+@opindex --in-place
+@cindex In-place editing, activating
+@cindex @value{SSEDEXT}, in-place editing
+This option specifies that files are to be edited in-place.
+@value{SSED} does this by creating a temporary file and
+sending output to this file rather than to the standard
+output.@footnote{This applies to commands such as @code{=},
+@code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can
+still write to the standard output by using the @code{w}
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+or @code{W} commands together with the @file{/dev/stdout}
+special file}.
+
+This option implies @option{-s}.
+
+When the end of the file is reached, the temporary file is
+renamed to the output file's original name. The extension,
+if supplied, is used to modify the name of the old file
+before renaming the temporary file, thereby making a backup
+copy@footnote{Note that @value{SSED} creates the backup
+ file whether or not any output is actually changed.}).
+
+@cindex In-place editing, Perl-style backup file names
+This rule is followed: if the extension doesn't contain a @code{*},
+then it is appended to the end of the current filename as a
+suffix; if the extension does contain one or more @code{*}
+characters, then @emph{each} asterisk is replaced with the
+current filename. This allows you to add a prefix to the
+backup file, instead of (or in addition to) a suffix, or
+even to place backup copies of the original files into another
+directory (provided the directory already exists).
+
+If no extension is supplied, the original file is
+overwritten without making a backup.
+
+@item -l @var{N}
+@itemx --line-length=@var{N}
+@opindex -l
+@opindex --line-length
+@cindex Line length, setting
+Specify the default line-wrap length for the @code{l} command.
+A length of 0 (zero) means to never wrap long lines. If
+not specified, it is taken to be 70.
+
+@item --posix
+@cindex @value{SSEDEXT}, disabling
+@value{SSED} includes several extensions to @acronym{POSIX}
+sed. In order to simplify writing portable scripts, this
+option disables all the extensions that this manual documents,
+including additional commands.
+@cindex @code{POSIXLY_CORRECT} behavior, enabling
+Most of the extensions accept @command{sed} programs that
+are outside the syntax mandated by @acronym{POSIX}, but some
+of them (such as the behavior of the @command{N} command
+described in @pxref{Reporting Bugs}) actually violate the
+standard. If you want to disable only the latter kind of
+extension, you can set the @code{POSIXLY_CORRECT} variable
+to a non-empty value.
+
+@item -r
+@itemx --regexp-extended
+@opindex -r
+@opindex --regexp-extended
+@cindex Extended regular expressions, choosing
+@cindex @acronym{GNU} extensions, extended regular expressions
+Use extended regular expressions rather than basic
+regular expressions. Extended regexps are those that
+@command{egrep} accepts; they can be clearer because they
+usually have less backslashes, but are a @acronym{GNU} extension
+and hence scripts that use them are not portable.
+@xref{Extended regexps, , Extended regular expressions}.
+
+@ifset PERL
+@item -R
+@itemx --regexp-perl
+@opindex -R
+@opindex --regexp-perl
+@cindex Perl-style regular expressions, choosing
+@cindex @value{SSEDEXT}, Perl-style regular expressions
+Use Perl-style regular expressions rather than basic
+regular expressions. Perl-style regexps are extremely
+powerful but are a @value{SSED} extension and hence scripts that
+use it are not portable. @xref{Perl regexps, ,
+Perl-style regular expressions}.
+@end ifset
+
+@item -s
+@itemx --separate
+@cindex Working on separate files
+By default, @command{sed} will consider the files specified on the
+command line as a single continuous long stream. This @value{SSED}
+extension allows the user to consider them as separate files:
+range addresses (such as @samp{/abc/,/def/}) are not allowed
+to span several files, line numbers are relative to the start
+of each file, @code{$} refers to the last line of each file,
+and files invoked from the @code{R} commands are rewound at the
+start of each file.
+
+@item -u
+@itemx --unbuffered
+@opindex -u
+@opindex --unbuffered
+@cindex Unbuffered I/O, choosing
+Buffer both input and output as minimally as practical.
+(This is particularly useful if the input is coming from
+the likes of @samp{tail -f}, and you wish to see the transformed
+output as soon as possible.)
+
+@item -e @var{script}
+@itemx --expression=@var{script}
+@opindex -e
+@opindex --expression
+@cindex Script, from command line
+Add the commands in @var{script} to the set of commands to be
+run while processing the input.
+
+@item -f @var{script-file}
+@itemx --file=@var{script-file}
+@opindex -f
+@opindex --file
+@cindex Script, from a file
+Add the commands contained in the file @var{script-file}
+to the set of commands to be run while processing the input.
+
+@end table
+
+If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file}
+options are given on the command-line,
+then the first non-option argument on the command line is
+taken to be the @var{script} to be executed.
+
+@cindex Files to be processed as input
+If any command-line parameters remain after processing the above,
+these parameters are interpreted as the names of input files to
+be processed.
+@cindex Standard input, processing as input
+A file name of @samp{-} refers to the standard input stream.
+The standard input will be processed if no file names are specified.
+
+
+@node sed Programs
+@chapter @command{sed} Programs
+
+@cindex @command{sed} program structure
+@cindex Script structure
+A @command{sed} program consists of one or more @command{sed} commands,
+passed in by one or more of the
+@option{-e}, @option{-f}, @option{--expression}, and @option{--file}
+options, or the first non-option argument if zero of these
+options are used.
+This document will refer to ``the'' @command{sed} script;
+this is understood to mean the in-order catenation
+of all of the @var{script}s and @var{script-file}s passed in.
+
+Each @code{sed} command consists of an optional address or
+address range, followed by a one-character command name
+and any additional command-specific code.
+
+@menu
+* Execution Cycle:: How @command{sed} works
+* Addresses:: Selecting lines with @command{sed}
+* Regular Expressions:: Overview of regular expression syntax
+* Common Commands:: Often used commands
+* The "s" Command:: @command{sed}'s Swiss Army Knife
+* Other Commands:: Less frequently used commands
+* Programming Commands:: Commands for @command{sed} gurus
+* Extended Commands:: Commands specific of @value{SSED}
+* Escapes:: Specifying special characters
+@end menu
+
+
+@node Execution Cycle
+@section How @command{sed} Works
+
+@cindex Buffer spaces, pattern and hold
+@cindex Spaces, pattern and hold
+@cindex Pattern space, definition
+@cindex Hold space, definition
+@command{sed} maintains two data buffers: the active @emph{pattern} space,
+and the auxiliary @emph{hold} space. Both are initially empty.
+
+@command{sed} operates by performing the following cycle on each
+lines of input: first, @command{sed} reads one line from the input
+stream, removes any trailing newline, and places it in the pattern space.
+Then commands are executed; each command can have an address associated
+to it: addresses are a kind of condition code, and a command is only
+executed if the condition is verified before the command is to be
+executed.
+
+When the end of the script is reached, unless the @option{-n} option
+is in use, the contents of pattern space are printed out to the output
+stream, adding back the trailing newline if it was removed.@footnote{Actually,
+ if @command{sed} prints a line without the terminating newline, it will
+ nevertheless print the missing newline as soon as more text is sent to
+ the same output stream, which gives the ``least expected surprise''
+ even though it does not make commands like @samp{sed -n p} exactly
+ identical to @command{cat}.} Then the next cycle starts for the next
+input line.
+
+Unless special commands (like @samp{D}) are used, the pattern space is
+deleted between two cycles. The hold space, on the other hand, keeps
+its data between cycles (see commands @samp{h}, @samp{H}, @samp{x},
+@samp{g}, @samp{G} to move data between both buffers).
+
+
+@node Addresses
+@section Selecting lines with @command{sed}
+@cindex Addresses, in @command{sed} scripts
+@cindex Line selection
+@cindex Selecting lines to process
+
+Addresses in a @command{sed} script can be in any of the following forms:
+@table @code
+@item @var{number}
+@cindex Address, numeric
+@cindex Line, selecting by number
+Specifying a line number will match only that line in the input.
+(Note that @command{sed} counts lines continuously across all input files
+unless @option{-i} or @option{-s} options are specified.)
+
+@item @var{first}~@var{step}
+@cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses
+This @acronym{GNU} extension matches every @var{step}th line
+starting with line @var{first}.
+In particular, lines will be selected when there exists
+a non-negative @var{n} such that the current line-number equals
+@var{first} + (@var{n} * @var{step}).
+Thus, to select the odd-numbered lines,
+one would use @code{1~2};
+to pick every third line starting with the second, @samp{2~3} would be used;
+to pick every fifth line starting with the tenth, use @samp{10~5};
+and @samp{50~0} is just an obscure way of saying @code{50}.
+
+@item $
+@cindex Address, last line
+@cindex Last line, selecting
+@cindex Line, selecting last
+This address matches the last line of the last file of input, or
+the last line of each file when the @option{-i} or @option{-s} options
+are specified.
+
+@item /@var{regexp}/
+@cindex Address, as a regular expression
+@cindex Line, selecting by regular expression match
+This will select any line which matches the regular expression @var{regexp}.
+If @var{regexp} itself includes any @code{/} characters,
+each must be escaped by a backslash (@code{\}).
+
+@cindex empty regular expression
+@cindex @value{SSEDEXT}, modifiers and the empty regular expression
+The empty regular expression @samp{//} repeats the last regular
+expression match (the same holds if the empty regular expression is
+passed to the @code{s} command). Note that modifiers to regular expressions
+are evaluated when the regular expression is compiled, thus it is invalid to
+specify them together with the empty regular expression.
+
+@item \%@var{regexp}%
+(The @code{%} may be replaced by any other single character.)
+
+@cindex Slash character, in regular expressions
+This also matches the regular expression @var{regexp},
+but allows one to use a different delimiter than @code{/}.
+This is particularly useful if the @var{regexp} itself contains
+a lot of slashes, since it avoids the tedious escaping of every @code{/}.
+If @var{regexp} itself includes any delimiter characters,
+each must be escaped by a backslash (@code{\}).
+
+@item /@var{regexp}/I
+@itemx \%@var{regexp}%I
+@cindex @acronym{GNU} extensions, @code{I} modifier
+@ifset PERL
+@cindex Perl-style regular expressions, case-insensitive
+@end ifset
+The @code{I} modifier to regular-expression matching is a @acronym{GNU}
+extension which causes the @var{regexp} to be matched in
+a case-insensitive manner.
+
+@item /@var{regexp}/M
+@itemx \%@var{regexp}%M
+@ifset PERL
+@cindex @value{SSEDEXT}, @code{M} modifier
+@end ifset
+@cindex Perl-style regular expressions, multiline
+The @code{M} modifier to regular-expression matching is a @value{SSED}
+extension which causes @code{^} and @code{$} to match respectively
+(in addition to the normal behavior) the empty string after a newline,
+and the empty string before a newline. There are special character
+sequences
+@ifset PERL
+(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'}
+in basic or extended regular expression modes)
+@end ifset
+@ifclear PERL
+(@code{\`} and @code{\'})
+@end ifclear
+which always match the beginning or the end of the buffer.
+@code{M} stands for @cite{multi-line}.
+
+@ifset PERL
+@item /@var{regexp}/S
+@itemx \%@var{regexp}%S
+@cindex @value{SSEDEXT}, @code{S} modifier
+@cindex Perl-style regular expressions, single line
+The @code{S} modifier to regular-expression matching is only valid
+in Perl mode and specifies that the dot character (@code{.}) will
+match the newline character too. @code{S} stands for @cite{single-line}.
+@end ifset
+
+@ifset PERL
+@item /@var{regexp}/X
+@itemx \%@var{regexp}%X
+@cindex @value{SSEDEXT}, @code{X} modifier
+@cindex Perl-style regular expressions, extended
+The @code{X} modifier to regular-expression matching is also
+valid in Perl mode only. If it is used, whitespace in the
+pattern (other than in a character class) and
+characters between a @kbd{#} outside a character class and the
+next newline character are ignored. An escaping backslash
+can be used to include a whitespace or @kbd{#} character as part
+of the pattern.
+@end ifset
+@end table
+
+If no addresses are given, then all lines are matched;
+if one address is given, then only lines matching that
+address are matched.
+
+@cindex Range of lines
+@cindex Several lines, selecting
+An address range can be specified by specifying two addresses
+separated by a comma (@code{,}). An address range matches lines
+starting from where the first address matches, and continues
+until the second address matches (inclusively).
+
+If the second address is a @var{regexp}, then checking for the
+ending match will start with the line @emph{following} the
+line which matched the first address: a range will always
+span at least two lines (except of course if the input stream
+ends).
+
+If the second address is a @var{number} less than (or equal to)
+the line matching the first address, then only the one line is
+matched.
+
+@cindex Special addressing forms
+@cindex Range with start address of zero
+@cindex Zero, as range start address
+@cindex @var{addr1},+N
+@cindex @var{addr1},~N
+@cindex @acronym{GNU} extensions, special two-address forms
+@cindex @acronym{GNU} extensions, @code{0} address
+@cindex @acronym{GNU} extensions, 0,@var{addr2} addressing
+@cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing
+@cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing
+@value{SSED} also supports some special two-address forms; all these
+are @acronym{GNU} extensions:
+@table @code
+@item 0,/@var{regexp}/
+A line number of @code{0} can be used in an address specification like
+@code{0,/@var{regexp}/} so that @command{sed} will try to match
+@var{regexp} in the first input line too. In other words,
+@code{0,/@var{regexp}/} is similar to @code{1,/@var{regexp}/},
+except that if @var{addr2} matches the very first line of input the
+@code{0,/@var{regexp}/} form will consider it to end the range, whereas
+the @code{1,/@var{regexp}/} form will match the beginning of its range and
+hence make the range span up to the @emph{second} occurrence of the
+regular expression.
+
+Note that this is the only place where the @code{0} address makes
+sense; there is no 0-th line and commands which are given the @code{0}
+address in any other way will give an error.
+
+@item @var{addr1},+@var{N}
+Matches @var{addr1} and the @var{N} lines following @var{addr1}.
+
+@item @var{addr1},~@var{N}
+Matches @var{addr1} and the lines following @var{addr1}
+until the next line whose input line number is a multiple of @var{N}.
+@end table
+
+@cindex Excluding lines
+@cindex Selecting non-matching lines
+Appending the @code{!} character to the end of an address
+specification negates the sense of the match.
+That is, if the @code{!} character follows an address range,
+then only lines which do @emph{not} match the address range
+will be selected.
+This also works for singleton addresses,
+and, perhaps perversely, for the null address.
+
+
+@node Regular Expressions
+@section Overview of Regular Expression Syntax
+
+To know how to use @command{sed}, people should understand regular
+expressions (@dfn{regexp} for short). A regular expression
+is a pattern that is matched against a
+subject string from left to right. Most characters are
+@dfn{ordinary}: they stand for
+themselves in a pattern, and match the corresponding characters
+in the subject. As a trivial example, the pattern
+
+@example
+ The quick brown fox
+@end example
+
+@noindent
+matches a portion of a subject string that is identical to
+itself. The power of regular expressions comes from the
+ability to include alternatives and repetitions in the pattern.
+These are encoded in the pattern by the use of @dfn{special characters},
+which do not stand for themselves but instead
+are interpreted in some special way. Here is a brief description
+of regular expression syntax as used in @command{sed}.
+
+@table @code
+@item @var{char}
+A single ordinary character matches itself.
+
+@item *
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Matches a sequence of zero or more instances of matches for the
+preceding regular expression, which must be an ordinary character, a
+special character preceded by @code{\}, a @code{.}, a grouped regexp
+(see below), or a bracket expression. As a @acronym{GNU} extension, a
+postfixed regular expression can also be followed by @code{*}; for
+example, @code{a**} is equivalent to @code{a*}. @acronym{POSIX}
+1003.1-2001 says that @code{*} stands for itself when it appears at
+the start of a regular expression or subexpression, but many
+non@acronym{GNU} implementations do not support this and portable
+scripts should instead use @code{\*} in these contexts.
+
+@item \+
+@cindex @acronym{GNU} extensions, to basic regular expressions
+As @code{*}, but matches one or more. It is a @acronym{GNU} extension.
+
+@item \?
+@cindex @acronym{GNU} extensions, to basic regular expressions
+As @code{*}, but only matches zero or one. It is a @acronym{GNU} extension.
+
+@item \@{@var{i}\@}
+As @code{*}, but matches exactly @var{i} sequences (@var{i} is a
+decimal integer; for portability, keep it between 0 and 255
+inclusive).
+
+@item \@{@var{i},@var{j}\@}
+Matches between @var{i} and @var{j}, inclusive, sequences.
+
+@item \@{@var{i},\@}
+Matches more than or equal to @var{i} sequences.
+
+@item \(@var{regexp}\)
+Groups the inner @var{regexp} as a whole, this is used to:
+
+@itemize @bullet
+@item
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Apply postfix operators, like @code{\(abcd\)*}:
+this will search for zero or more whole sequences
+of @samp{abcd}, while @code{abcd*} would search
+for @samp{abc} followed by zero or more occurrences
+of @samp{d}. Note that support for @code{\(abcd\)*} is
+required by @acronym{POSIX} 1003.1-2001, but many non-@acronym{GNU}
+implementations do not support it and hence it is not universally
+portable.
+
+@item
+Use back references (see below).
+@end itemize
+
+@item .
+Matches any character, including newline.
+
+@item ^
+Matches the null string at beginning of line, i.e. what
+appears after the circumflex must appear at the
+beginning of line. @code{^#include} will match only
+lines where @samp{#include} is the first thing on line---if
+there are spaces before, for example, the match fails.
+@code{^} acts as a special character only at the beginning
+of the regular expression or subexpression (that is,
+after @code{\(} or @code{\|}). Portable scripts should avoid
+@code{^} at the beginning of a subexpression, though, as
+@acronym{POSIX} allows implementations that treat @code{^} as
+an ordinary character in that context.
+
+
+@item $
+It is the same as @code{^}, but refers to end of line.
+@code{$} also acts as a special character only at the end
+of the regular expression or subexpression (that is, before @code{\)}
+or @code{\|}), and its use at the end of a subexpression is not
+portable.
+
+
+@item [@var{list}]
+@itemx [^@var{list}]
+Matches any single character in @var{list}: for example,
+@code{[aeiou]} matches all vowels. A list may include
+sequences like @code{@var{char1}-@var{char2}}, which
+matches any character between (inclusive) @var{char1}
+and @var{char2}.
+
+A leading @code{^} reverses the meaning of @var{list}, so that
+it matches any single character @emph{not} in @var{list}. To include
+@code{]} in the list, make it the first character (after
+the @code{^} if needed), to include @code{-} in the list,
+make it the first or last; to include @code{^} put
+it after the first character.
+
+@cindex @code{POSIXLY_CORRECT} behavior, bracket expressions
+The characters @code{$}, @code{*}, @code{.}, @code{[}, and @code{\}
+are normally not special within @var{list}. For example, @code{[\*]}
+matches either @samp{\} or @samp{*}, because the @code{\} is not
+special here. However, strings like @code{[.ch.]}, @code{[=a=]}, and
+@code{[:space:]} are special within @var{list} and represent collating
+symbols, equivalence classes, and character classes, respectively, and
+@code{[} is therefore special within @var{list} when it is followed by
+@code{.}, @code{=}, or @code{:}. Also, when not in
+@env{POSIXLY_CORRECT} mode, special escapes like @code{\n} and
+@code{\t} are recognized within @var{list}. @xref{Escapes}.
+
+@item @var{regexp1}\|@var{regexp2}
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Matches either @var{regexp1} or @var{regexp2}. Use
+parentheses to use complex alternative regular expressions.
+The matching process tries each alternative in turn, from
+left to right, and the first one that succeeds is used.
+It is a @acronym{GNU} extension.
+
+@item @var{regexp1}@var{regexp2}
+Matches the concatenation of @var{regexp1} and @var{regexp2}.
+Concatenation binds more tightly than @code{\|}, @code{^}, and
+@code{$}, but less tightly than the other regular expression
+operators.
+
+@item \@var{digit}
+Matches the @var{digit}-th @code{\(@dots{}\)} parenthesized
+subexpression in the regular expression. This is called a @dfn{back
+reference}. Subexpressions are implicity numbered by counting
+occurrences of @code{\(} left-to-right.
+
+@item \n
+Matches the newline character.
+
+@item \@var{char}
+Matches @var{char}, where @var{char} is one of @code{$},
+@code{*}, @code{.}, @code{[}, @code{\}, or @code{^}.
+Note that the only C-like
+backslash sequences that you can portably assume to be
+interpreted are @code{\n} and @code{\\}; in particular
+@code{\t} is not portable, and matches a @samp{t} under most
+implementations of @command{sed}, rather than a tab character.
+
+@end table
+
+@cindex Greedy regular expression matching
+Note that the regular expression matcher is greedy, i.e., matches
+are attempted from left to right and, if two or more matches are
+possible starting at the same character, it selects the longest.
+
+@noindent
+Examples:
+@table @samp
+@item abcdef
+Matches @samp{abcdef}.
+
+@item a*b
+Matches zero or more @samp{a}s followed by a single
+@samp{b}. For example, @samp{b} or @samp{aaaaab}.
+
+@item a\?b
+Matches @samp{b} or @samp{ab}.
+
+@item a\+b\+
+Matches one or more @samp{a}s followed by one or more
+@samp{b}s: @samp{ab} is the shortest possible match, but
+other examples are @samp{aaaab} or @samp{abbbbb} or
+@samp{aaaaaabbbbbbb}.
+
+@item .*
+@itemx .\+
+These two both match all the characters in a string;
+however, the first matches every string (including the empty
+string), while the second matches only strings containing
+at least one character.
+
+@item ^main.*(.*)
+his matches a string starting with @samp{main},
+followed by an opening and closing
+parenthesis. The @samp{n}, @samp{(} and @samp{)} need not
+be adjacent.
+
+@item ^#
+This matches a string beginning with @samp{#}.
+
+@item \\$
+This matches a string ending with a single backslash. The
+regexp contains two backslashes for escaping.
+
+@item \$
+Instead, this matches a string consisting of a single dollar sign,
+because it is escaped.
+
+@item [a-zA-Z0-9]
+In the C locale, this matches any @acronym{ASCII} letters or digits.
+
+@item [^ @kbd{tab}]\+
+(Here @kbd{tab} stands for a single tab character.)
+This matches a string of one or more
+characters, none of which is a space or a tab.
+Usually this means a word.
+
+@item ^\(.*\)\n\1$
+This matches a string consisting of two equal substrings separated by
+a newline.
+
+@item .\@{9\@}A$
+This matches nine characters followed by an @samp{A}.
+
+@item ^.\@{15\@}A
+This matches the start of a string that contains 16 characters,
+the last of which is an @samp{A}.
+
+@end table
+
+
+
+@node Common Commands
+@section Often-Used Commands
+
+If you use @command{sed} at all, you will quite likely want to know
+these commands.
+
+@table @code
+@item #
+[No addresses allowed.]
+
+@findex # (comments)
+@cindex Comments, in scripts
+The @code{#} character begins a comment;
+the comment continues until the next newline.
+
+@cindex Portability, comments
+If you are concerned about portability, be aware that
+some implementations of @command{sed} (which are not @sc{posix}
+conformant) may only support a single one-line comment,
+and then only when the very first character of the script is a @code{#}.
+
+@findex -n, forcing from within a script
+@cindex Caveat --- #n on first line
+Warning: if the first two characters of the @command{sed} script
+are @code{#n}, then the @option{-n} (no-autoprint) option is forced.
+If you want to put a comment in the first line of your script
+and that comment begins with the letter @samp{n}
+and you do not want this behavior,
+then be sure to either use a capital @samp{N},
+or place at least one space before the @samp{n}.
+
+@item q [@var{exit-code}]
+This command only accepts a single address.
+
+@findex q (quit) command
+@cindex @value{SSEDEXT}, returning an exit code
+@cindex Quitting
+Exit @command{sed} without processing any more commands or input.
+Note that the current pattern space is printed if auto-print is
+not disabled with the @option{-n} options. The ability to return
+an exit code from the @command{sed} script is a @value{SSED} extension.
+
+@item d
+@findex d (delete) command
+@cindex Text, deleting
+Delete the pattern space;
+immediately start next cycle.
+
+@item p
+@findex p (print) command
+@cindex Text, printing
+Print out the pattern space (to the standard output).
+This command is usually only used in conjunction with the @option{-n}
+command-line option.
+
+@item n
+@findex n (next-line) command
+@cindex Next input line, replace pattern space with
+@cindex Read next input line
+If auto-print is not disabled, print the pattern space,
+then, regardless, replace the pattern space with the next line of input.
+If there is no more input then @command{sed} exits without processing
+any more commands.
+
+@item @{ @var{commands} @}
+@findex @{@} command grouping
+@cindex Grouping commands
+@cindex Command groups
+A group of commands may be enclosed between
+@code{@{} and @code{@}} characters.
+This is particularly useful when you want a group of commands
+to be triggered by a single address (or address-range) match.
+
+@end table
+
+@node The "s" Command
+@section The @code{s} Command
+
+The syntax of the @code{s} (as in substitute) command is
+@samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/}
+characters may be uniformly replaced by any other single
+character within any given @code{s} command. The @code{/}
+character (or whatever other character is used in its stead)
+can appear in the @var{regexp} or @var{replacement}
+only if it is preceded by a @code{\} character.
+
+The @code{s} command is probably the most important in @command{sed}
+and has a lot of different options. Its basic concept is simple:
+the @code{s} command attempts to match the pattern
+space against the supplied @var{regexp}; if the match is
+successful, then that portion of the pattern
+space which was matched is replaced with @var{replacement}.
+
+@cindex Backreferences, in regular expressions
+@cindex Parenthesized substrings
+The @var{replacement} can contain @code{\@var{n}} (@var{n} being
+a number from 1 to 9, inclusive) references, which refer to
+the portion of the match which is contained between the @var{n}th
+@code{\(} and its matching @code{\)}.
+Also, the @var{replacement} can contain unescaped @code{&}
+characters which reference the whole matched portion
+of the pattern space.
+@cindex @value{SSEDEXT}, case modifiers in @code{s} commands
+Finally, as a @value{SSED} extension, you can include a
+special sequence made of a backslash and one of the letters
+@code{L}, @code{l}, @code{U}, @code{u}, or @code{E}.
+The meaning is as follows:
+
+@table @code
+@item \L
+Turn the replacement
+to lowercase until a @code{\U} or @code{\E} is found,
+
+@item \l
+Turn the
+next character to lowercase,
+
+@item \U
+Turn the replacement to uppercase
+until a @code{\L} or @code{\E} is found,
+
+@item \u
+Turn the next character
+to uppercase,
+
+@item \E
+Stop case conversion started by @code{\L} or @code{\U}.
+@end table
+
+To include a literal @code{\}, @code{&}, or newline in the final
+replacement, be sure to precede the desired @code{\}, @code{&},
+or newline in the @var{replacement} with a @code{\}.
+
+@findex s command, option flags
+@cindex Substitution of text, options
+The @code{s} command can be followed by zero or more of the
+following @var{flags}:
+
+@table @code
+@item g
+@cindex Global substitution
+@cindex Replacing all text matching regexp in a line
+Apply the replacement to @emph{all} matches to the @var{regexp},
+not just the first.
+
+@item @var{number}
+@cindex Replacing only @var{n}th match of regexp in a line
+Only replace the @var{number}th match of the @var{regexp}.
+
+@cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command
+@cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command
+Note: the @sc{posix} standard does not specify what should happen
+when you mix the @code{g} and @var{number} modifiers,
+and currently there is no widely agreed upon meaning
+across @command{sed} implementations.
+For @value{SSED}, the interaction is defined to be:
+ignore matches before the @var{number}th,
+and then match and replace all matches from
+the @var{number}th on.
+
+@item p
+@cindex Text, printing after substitution
+If the substitution was made, then print the new pattern space.
+
+Note: when both the @code{p} and @code{e} options are specified,
+the relative ordering of the two produces very different results.
+In general, @code{ep} (evaluate then print) is what you want,
+but operating the other way round can be useful for debugging.
+For this reason, the current version of @value{SSED} interprets
+specially the presence of @code{p} options both before and after
+@code{e}, printing the pattern space before and after evaluation,
+while in general flags for the @code{s} command show their
+effect just once. This behavior, although documented, might
+change in future versions.
+
+@item w @var{file-name}
+@cindex Text, writing to a file after substitution
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+@cindex @value{SSEDEXT}, @file{/dev/stderr} file
+If the substitution was made, then write out the result to the named file.
+As a @value{SSED} extension, two special values of @var{file-name} are
+supported: @file{/dev/stderr}, which writes the result to the standard
+error, and @file{/dev/stdout}, which writes to the standard
+output.@footnote{This is equivalent to @code{p} unless the @option{-i}
+option is being used.}
+
+@item e
+@cindex Evaluate Bourne-shell commands, after substitution
+@cindex Subprocesses
+@cindex @value{SSEDEXT}, evaluating Bourne-shell commands
+@cindex @value{SSEDEXT}, subprocesses
+This command allows one to pipe input from a shell command
+into pattern space. If a substitution was made, the command
+that is found in pattern space is executed and pattern space
+is replaced with its output. A trailing newline is suppressed;
+results are undefined if the command to be executed contains
+a @sc{nul} character. This is a @value{SSED} extension.
+
+@item I
+@itemx i
+@cindex @acronym{GNU} extensions, @code{I} modifier
+@cindex Case-insensitive matching
+@ifset PERL
+@cindex Perl-style regular expressions, case-insensitive
+@end ifset
+The @code{I} modifier to regular-expression matching is a @acronym{GNU}
+extension which makes @command{sed} match @var{regexp} in a
+case-insensitive manner.
+
+@item M
+@itemx m
+@cindex @value{SSEDEXT}, @code{M} modifier
+@ifset PERL
+@cindex Perl-style regular expressions, multiline
+@end ifset
+The @code{M} modifier to regular-expression matching is a @value{SSED}
+extension which causes @code{^} and @code{$} to match respectively
+(in addition to the normal behavior) the empty string after a newline,
+and the empty string before a newline. There are special character
+sequences
+@ifset PERL
+(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'}
+in basic or extended regular expression modes)
+@end ifset
+@ifclear PERL
+(@code{\`} and @code{\'})
+@end ifclear
+which always match the beginning or the end of the buffer.
+@code{M} stands for @cite{multi-line}.
+
+@ifset PERL
+@item S
+@itemx s
+@cindex @value{SSEDEXT}, @code{S} modifier
+@cindex Perl-style regular expressions, single line
+The @code{S} modifier to regular-expression matching is only valid
+in Perl mode and specifies that the dot character (@code{.}) will
+match the newline character too. @code{S} stands for @cite{single-line}.
+@end ifset
+
+@ifset PERL
+@item X
+@itemx x
+@cindex @value{SSEDEXT}, @code{X} modifier
+@cindex Perl-style regular expressions, extended
+The @code{X} modifier to regular-expression matching is also
+valid in Perl mode only. If it is used, whitespace in the
+pattern (other than in a character class) and
+characters between a @kbd{#} outside a character class and the
+next newline character are ignored. An escaping backslash
+can be used to include a whitespace or @kbd{#} character as part
+of the pattern.
+@end ifset
+@end table
+
+
+@node Other Commands
+@section Less Frequently-Used Commands
+
+Though perhaps less frequently used than those in the previous
+section, some very small yet useful @command{sed} scripts can be built with
+these commands.
+
+@table @code
+@item y/@var{source-chars}/@var{dest-chars}/
+(The @code{/} characters may be uniformly replaced by
+any other single character within any given @code{y} command.)
+
+@findex y (transliterate) command
+@cindex Transliteration
+Transliterate any characters in the pattern space which match
+any of the @var{source-chars} with the corresponding character
+in @var{dest-chars}.
+
+Instances of the @code{/} (or whatever other character is used in its stead),
+@code{\}, or newlines can appear in the @var{source-chars} or @var{dest-chars}
+lists, provide that each instance is escaped by a @code{\}.
+The @var{source-chars} and @var{dest-chars} lists @emph{must}
+contain the same number of characters (after de-escaping).
+
+@item a\
+@itemx @var{text}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex a (append text lines) command
+@cindex Appending text after a line
+@cindex Text, appending
+Queue the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output)
+to be output at the end of the current cycle,
+or when the next input line is read.
+
+Escape sequences in @var{text} are processed, so you should
+use @code{\\} in @var{text} to print a single backslash.
+
+As a @acronym{GNU} extension, if between the @code{a} and the newline there is
+other than a whitespace-@code{\} sequence, then the text of this line,
+starting at the first non-whitespace character after the @code{a},
+is taken as the first line of the @var{text} block.
+(This enables a simplification in scripting a one-line add.)
+This extension also works with the @code{i} and @code{c} commands.
+
+@item i\
+@itemx @var{text}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex i (insert text lines) command
+@cindex Inserting text before a line
+@cindex Text, insertion
+Immediately output the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output).
+
+@item c\
+@itemx @var{text}
+@findex c (change to text lines) command
+@cindex Replacing selected lines with other text
+Delete the lines matching the address or address-range,
+and output the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output)
+in place of the last line
+(or in place of each line, if no addresses were specified).
+A new cycle is started after this command is done,
+since the pattern space will have been deleted.
+
+@item =
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex = (print line number) command
+@cindex Printing line number
+@cindex Line number, printing
+Print out the current input line number (with a trailing newline).
+
+@item l @var{n}
+@findex l (list unambiguously) command
+@cindex List pattern space
+@cindex Printing text unambiguously
+@cindex Line length, setting
+@cindex @value{SSEDEXT}, setting line length
+Print the pattern space in an unambiguous form:
+non-printable characters (and the @code{\} character)
+are printed in C-style escaped form; long lines are split,
+with a trailing @code{\} character to indicate the split;
+the end of each line is marked with a @code{$}.
+
+@var{n} specifies the desired line-wrap length;
+a length of 0 (zero) means to never wrap long lines. If omitted,
+the default as specified on the command line is used. The @var{n}
+parameter is a @value{SSED} extension.
+
+@item r @var{filename}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex r (read file) command
+@cindex Read text from a file
+@cindex @value{SSEDEXT}, @file{/dev/stdin} file
+Queue the contents of @var{filename} to be read and
+inserted into the output stream at the end of the current cycle,
+or when the next input line is read.
+Note that if @var{filename} cannot be read, it is treated as
+if it were an empty file, without any error indication.
+
+As a @value{SSED} extension, the special value @file{/dev/stdin}
+is supported for the file name, which reads the contents of the
+standard input.
+
+@item w @var{filename}
+@findex w (write file) command
+@cindex Write to a file
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+@cindex @value{SSEDEXT}, @file{/dev/stderr} file
+Write the pattern space to @var{filename}.
+As a @value{SSED} extension, two special values of @var{file-name} are
+supported: @file{/dev/stderr}, which writes the result to the standard
+error, and @file{/dev/stdout}, which writes to the standard
+output.@footnote{This is equivalent to @code{p} unless the @option{-i}
+option is being used.}
+
+The file will be created (or truncated) before the
+first input line is read; all @code{w} commands
+(including instances of @code{w} flag on successful @code{s} commands)
+which refer to the same @var{filename} are output without
+closing and reopening the file.
+
+@item D
+@findex D (delete first line) command
+@cindex Delete first line from pattern space
+Delete text in the pattern space up to the first newline.
+If any text is left, restart cycle with the resultant
+pattern space (without reading a new line of input),
+otherwise start a normal new cycle.
+
+@item N
+@findex N (append Next line) command
+@cindex Next input line, append to pattern space
+@cindex Append next input line to pattern space
+Add a newline to the pattern space,
+then append the next line of input to the pattern space.
+If there is no more input then @command{sed} exits without processing
+any more commands.
+
+@item P
+@findex P (print first line) command
+@cindex Print first line from pattern space
+Print out the portion of the pattern space up to the first newline.
+
+@item h
+@findex h (hold) command
+@cindex Copy pattern space into hold space
+@cindex Replace hold space with copy of pattern space
+@cindex Hold space, copying pattern space into
+Replace the contents of the hold space with the contents of the pattern space.
+
+@item H
+@findex H (append Hold) command
+@cindex Append pattern space to hold space
+@cindex Hold space, appending from pattern space
+Append a newline to the contents of the hold space,
+and then append the contents of the pattern space to that of the hold space.
+
+@item g
+@findex g (get) command
+@cindex Copy hold space into pattern space
+@cindex Replace pattern space with copy of hold space
+@cindex Hold space, copy into pattern space
+Replace the contents of the pattern space with the contents of the hold space.
+
+@item G
+@findex G (appending Get) command
+@cindex Append hold space to pattern space
+@cindex Hold space, appending to pattern space
+Append a newline to the contents of the pattern space,
+and then append the contents of the hold space to that of the pattern space.
+
+@item x
+@findex x (eXchange) command
+@cindex Exchange hold space with pattern space
+@cindex Hold space, exchange with pattern space
+Exchange the contents of the hold and pattern spaces.
+
+@end table
+
+
+@node Programming Commands
+@section Commands for @command{sed} gurus
+
+In most cases, use of these commands indicates that you are
+probably better off programming in something like @command{awk}
+or Perl. But occasionally one is committed to sticking
+with @command{sed}, and these commands can enable one to write
+quite convoluted scripts.
+
+@cindex Flow of control in scripts
+@table @code
+@item : @var{label}
+[No addresses allowed.]
+
+@findex : (label) command
+@cindex Labels, in scripts
+Specify the location of @var{label} for branch commands.
+In all other respects, a no-op.
+
+@item b @var{label}
+@findex b (branch) command
+@cindex Branch to a label, unconditionally
+@cindex Goto, in scripts
+Unconditionally branch to @var{label}.
+The @var{label} may be omitted, in which case the next cycle is started.
+
+@item t @var{label}
+@findex t (test and branch if successful) command
+@cindex Branch to a label, if @code{s///} succeeded
+@cindex Conditional branch
+Branch to @var{label} only if there has been a successful @code{s}ubstitution
+since the last input line was read or conditional branch was taken.
+The @var{label} may be omitted, in which case the next cycle is started.
+
+@end table
+
+@node Extended Commands
+@section Commands Specific to @value{SSED}
+
+These commands are specific to @value{SSED}, so you
+must use them with care and only when you are sure that
+hindering portability is not evil. They allow you to check
+for @value{SSED} extensions or to do tasks that are required
+quite often, yet are unsupported by standard @command{sed}s.
+
+@table @code
+@item e [@var{command}]
+@findex e (evaluate) command
+@cindex Evaluate Bourne-shell commands
+@cindex Subprocesses
+@cindex @value{SSEDEXT}, evaluating Bourne-shell commands
+@cindex @value{SSEDEXT}, subprocesses
+This command allows one to pipe input from a shell command
+into pattern space. Without parameters, the @code{e} command
+executes the command that is found in pattern space and
+replaces the pattern space with the output; a trailing newline
+is suppressed.
+
+If a parameter is specified, instead, the @code{e} command
+interprets it as a command and sends its output to the output stream
+(like @code{r} does). The command can run across multiple
+lines, all but the last ending with a back-slash.
+
+In both cases, the results are undefined if the command to be
+executed contains a @sc{nul} character.
+
+@item L @var{n}
+@findex L (fLow paragraphs) command
+@cindex Reformat pattern space
+@cindex Reformatting paragraphs
+@cindex @value{SSEDEXT}, reformatting paragraphs
+@cindex @value{SSEDEXT}, @code{L} command
+This @value{SSED} extension fills and joins lines in pattern space
+to produce output lines of (at most) @var{n} characters, like
+@code{fmt} does; if @var{n} is omitted, the default as specified
+on the command line is used. This command is considered a failed
+experiment and unless there is enough request (which seems unlikely)
+will be removed in future versions.
+
+@ignore
+Blank lines, spaces between words, and indentation are
+preserved in the output; successive input lines with different
+indentation are not joined; tabs are expanded to 8 columns.
+
+If the pattern space contains multiple lines, they are joined, but
+since the pattern space usually contains a single line, the behavior
+of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e.,
+it does not join short lines to form longer ones).
+
+@var{n} specifies the desired line-wrap length; if omitted,
+the default as specified on the command line is used.
+@end ignore
+
+@item Q [@var{exit-code}]
+This command only accepts a single address.
+
+@findex Q (silent Quit) command
+@cindex @value{SSEDEXT}, quitting silently
+@cindex @value{SSEDEXT}, returning an exit code
+@cindex Quitting
+This command is the same as @code{q}, but will not print the
+contents of pattern space. Like @code{q}, it provides the
+ability to return an exit code to the caller.
+
+This command can be useful because the only alternative ways
+to accomplish this apparently trivial function are to use
+the @option{-n} option (which can unnecessarily complicate
+your script) or resorting to the following snippet, which
+wastes time by reading the whole file without any visible effect:
+
+@example
+:eat
+$d @i{Quit silently on the last line}
+N @i{Read another line, silently}
+g @i{Overwrite pattern space each time to save memory}
+b eat
+@end example
+
+@item R @var{filename}
+@findex R (read line) command
+@cindex Read text from a file
+@cindex @value{SSEDEXT}, reading a file a line at a time
+@cindex @value{SSEDEXT}, @code{R} command
+@cindex @value{SSEDEXT}, @file{/dev/stdin} file
+Queue a line of @var{filename} to be read and
+inserted into the output stream at the end of the current cycle,
+or when the next input line is read.
+Note that if @var{filename} cannot be read, or if its end is
+reached, no line is appended, without any error indication.
+
+As with the @code{r} command, the special value @file{/dev/stdin}
+is supported for the file name, which reads a line from the
+standard input.
+
+@item T @var{label}
+@findex T (test and branch if failed) command
+@cindex @value{SSEDEXT}, branch if @code{s///} failed
+@cindex Branch to a label, if @code{s///} failed
+@cindex Conditional branch
+Branch to @var{label} only if there have been no successful
+@code{s}ubstitutions since the last input line was read or
+conditional branch was taken. The @var{label} may be omitted,
+in which case the next cycle is started.
+
+@item v @var{version}
+@findex v (version) command
+@cindex @value{SSEDEXT}, checking for their presence
+@cindex Requiring @value{SSED}
+This command does nothing, but makes @command{sed} fail if
+@value{SSED} extensions are not supported, simply because other
+versions of @command{sed} do not implement it. In addition, you
+can specify the version of @command{sed} that your script
+requires, such as @code{4.0.5}. The default is @code{4.0}
+because that is the first version that implemented this command.
+
+This command enables all @value{SSEDEXT} even if
+@env{POSIXLY_CORRECT} is set in the environment.
+
+@item W @var{filename}
+@findex W (write first line) command
+@cindex Write first line to a file
+@cindex @value{SSEDEXT}, writing first line to a file
+Write to the given filename the portion of the pattern space up to
+the first newline. Everything said under the @code{w} command about
+file handling holds here too.
+@end table
+
+@node Escapes
+@section @acronym{GNU} Extensions for Escapes in Regular Expressions
+
+@cindex @acronym{GNU} extensions, special escapes
+Until this chapter, we have only encountered escapes of the form
+@samp{\^}, which tell @command{sed} not to interpret the circumflex
+as a special character, but rather to take it literally. For
+example, @samp{\*} matches a single asterisk rather than zero
+or more backslashes.
+
+@cindex @code{POSIXLY_CORRECT} behavior, escapes
+This chapter introduces another kind of escape@footnote{All
+the escapes introduced here are @acronym{GNU}
+extensions, with the exception of @code{\n}. In basic regular
+expression mode, setting @code{POSIXLY_CORRECT} disables them inside
+bracket expressions.}---that
+is, escapes that are applied to a character or sequence of characters
+that ordinarily are taken literally, and that @command{sed} replaces
+with a special character. This provides a way
+of encoding non-printable characters in patterns in a visible manner.
+There is no restriction on the appearance of non-printing characters
+in a @command{sed} script but when a script is being prepared in the
+shell or by text editing, it is usually easier to use one of
+the following escape sequences than the binary character it
+represents:
+
+The list of these escapes is:
+
+@table @code
+@item \a
+Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7).
+
+@item \f
+Produces or matches a form feed (@sc{ascii} 12).
+
+@item \n
+Produces or matches a newline (@sc{ascii} 10).
+
+@item \r
+Produces or matches a carriage return (@sc{ascii} 13).
+
+@item \t
+Produces or matches a horizontal tab (@sc{ascii} 9).
+
+@item \v
+Produces or matches a so called ``vertical tab'' (@sc{ascii} 11).
+
+@item \c@var{x}
+Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is
+any character. The precise effect of @samp{\c@var{x}} is as follows:
+if @var{x} is a lower case letter, it is converted to upper case.
+Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes
+hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B.
+
+@item \d@var{xxx}
+Produces or matches a character whose decimal @sc{ascii} value is @var{xxx}.
+
+@item \o@var{xxx}
+@ifset PERL
+@item \@var{xxx}
+@end ifset
+Produces or matches a character whose octal @sc{ascii} value is @var{xxx}.
+@ifset PERL
+The syntax without the @code{o} is active in Perl mode, while the one
+with the @code{o} is active in the normal or extended @sc{posix} regular
+expression modes.
+@end ifset
+
+@item \x@var{xx}
+Produces or matches a character whose hexadecimal @sc{ascii} value is @var{xx}.
+@end table
+
+@samp{\b} (backspace) was omitted because of the conflict with
+the existing ``word boundary'' meaning.
+
+Other escapes match a particular character class and are valid only in
+regular expressions:
+
+@table @code
+@item \w
+Matches any ``word'' character. A ``word'' character is any
+letter or digit or the underscore character.
+
+@item \W
+Matches any ``non-word'' character.
+
+@item \b
+Matches a word boundary; that is it matches if the character
+to the left is a ``word'' character and the character to the
+right is a ``non-word'' character, or vice-versa.
+
+@item \B
+Matches everywhere but on a word boundary; that is it matches
+if the character to the left and the character to the right
+are either both ``word'' characters or both ``non-word''
+characters.
+
+@item \`
+Matches only at the start of pattern space. This is different
+from @code{^} in multi-line mode.
+
+@item \'
+Matches only at the end of pattern space. This is different
+from @code{$} in multi-line mode.
+
+@ifset PERL
+@item \G
+Match only at the start of pattern space or, when doing a global
+substitution using the @code{s///g} command and option, at
+the end-of-match position of the prior match. For example,
+@samp{s/\Ga/Z/g} will change an initial run of @code{a}s to
+a run of @code{Z}s
+@end ifset
+@end table
+
+@node Examples
+@chapter Some Sample Scripts
+
+Here are some @command{sed} scripts to guide you in the art of mastering
+@command{sed}.
+
+@menu
+Some exotic examples:
+* Centering lines::
+* Increment a number::
+* Rename files to lower case::
+* Print bash environment::
+* Reverse chars of lines::
+
+Emulating standard utilities:
+* tac:: Reverse lines of files
+* cat -n:: Numbering lines
+* cat -b:: Numbering non-blank lines
+* wc -c:: Counting chars
+* wc -w:: Counting words
+* wc -l:: Counting lines
+* head:: Printing the first lines
+* tail:: Printing the last lines
+* uniq:: Make duplicate lines unique
+* uniq -d:: Print duplicated lines of input
+* uniq -u:: Remove all duplicated lines
+* cat -s:: Squeezing blank lines
+@end menu
+
+@node Centering lines
+@section Centering Lines
+
+This script centers all lines of a file on a 80 columns width.
+To change that width, the number in @code{\@{@dots{}\@}} must be
+replaced, and the number of added spaces also must be changed.
+
+Note how the buffer commands are used to separate parts in
+the regular expressions to be matched---this is a common
+technique.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+# Put 80 spaces in the buffer
+1 @{
+ x
+ s/^$/ /
+ s/^.*$/&&&&&&&&/
+ x
+@}
+
+# del leading and trailing spaces
+y/@kbd{tab}/ /
+s/^ *//
+s/ *$//
+
+# add a newline and 80 spaces to end of line
+G
+
+# keep first 81 chars (80 + a newline)
+s/^\(.\@{81\@}\).*$/\1/
+
+# \2 matches half of the spaces, which are moved to the beginning
+s/^\(.*\)\n\(.*\)\2/\2\1/
+@end example
+@c end---------------------------------------------
+
+@node Increment a number
+@section Increment a Number
+
+This script is one of a few that demonstrate how to do arithmetic
+in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg
+Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator!
+It is distributed together with sed.} but must be done manually.
+
+To increment one number you just add 1 to last digit, replacing
+it by the following digit. There is one exception: when the digit
+is a nine the previous digits must be also incremented until you
+don't have a nine.
+
+This solution by Bruno Haible is very clever and smart because
+it uses a single buffer; if you don't have this limitation, the
+algorithm used in @ref{cat -n, Numbering lines}, is faster.
+It works by replacing trailing nines with an underscore, then
+using multiple @code{s} commands to increment the last digit,
+and then again substituting underscores with zeros.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+/[^0-9]/ d
+
+# replace all leading 9s by _ (any other character except digits, could
+# be used)
+:d
+s/9\(_*\)$/_\1/
+td
+
+# incr last digit only. The first line adds a most-significant
+# digit of 1 if we have to add a digit.
+#
+# The @code{tn} commands are not necessary, but make the thing
+# faster
+
+s/^\(_*\)$/1\1/; tn
+s/8\(_*\)$/9\1/; tn
+s/7\(_*\)$/8\1/; tn
+s/6\(_*\)$/7\1/; tn
+s/5\(_*\)$/6\1/; tn
+s/4\(_*\)$/5\1/; tn
+s/3\(_*\)$/4\1/; tn
+s/2\(_*\)$/3\1/; tn
+s/1\(_*\)$/2\1/; tn
+s/0\(_*\)$/1\1/; tn
+
+:n
+y/_/0/
+@end example
+@c end---------------------------------------------
+
+@node Rename files to lower case
+@section Rename Files to Lower Case
+
+This is a pretty strange use of @command{sed}. We transform text, and
+transform it to be shell commands, then just feed them to shell.
+Don't worry, even worse hacks are done when using @command{sed}; I have
+seen a script converting the output of @command{date} into a @command{bc}
+program!
+
+The main body of this is the @command{sed} script, which remaps the name
+from lower to upper (or vice-versa) and even checks out
+if the remapped name is the same as the original name.
+Note how the script is parameterized using shell
+variables and proper quoting.
+
+@c start-------------------------------------------
+@example
+#! /bin/sh
+# rename files to lower/upper case...
+#
+# usage:
+# move-to-lower *
+# move-to-upper *
+# or
+# move-to-lower -R .
+# move-to-upper -R .
+#
+
+help()
+@{
+ cat << eof
+Usage: $0 [-n] [-r] [-h] files...
+
+-n do nothing, only see what would be done
+-R recursive (use find)
+-h this message
+files files to remap to lower case
+
+Examples:
+ $0 -n * (see if everything is ok, then...)
+ $0 *
+
+ $0 -R .
+
+eof
+@}
+
+apply_cmd='sh'
+finder='echo "$@@" | tr " " "\n"'
+files_only=
+
+while :
+do
+ case "$1" in
+ -n) apply_cmd='cat' ;;
+ -R) finder='find "$@@" -type f';;
+ -h) help ; exit 1 ;;
+ *) break ;;
+ esac
+ shift
+done
+
+if [ -z "$1" ]; then
+ echo Usage: $0 [-h] [-n] [-r] files...
+ exit 1
+fi
+
+LOWER='abcdefghijklmnopqrstuvwxyz'
+UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+case `basename $0` in
+ *upper*) TO=$UPPER; FROM=$LOWER ;;
+ *) FROM=$UPPER; TO=$LOWER ;;
+esac
+
+eval $finder | sed -n '
+
+# remove all trailing slashes
+s/\/*$//
+
+# add ./ if there is no path, only a filename
+/\//! s/^/.\//
+
+# save path+filename
+h
+
+# remove path
+s/.*\///
+
+# do conversion only on filename
+y/'$FROM'/'$TO'/
+
+# now line contains original path+file, while
+# hold space contains the new filename
+x
+
+# add converted file name to line, which now contains
+# path/file-name\nconverted-file-name
+G
+
+# check if converted file name is equal to original file name,
+# if it is, do not print nothing
+/^.*\/\(.*\)\n\1/b
+
+# now, transform path/fromfile\n, into
+# mv path/fromfile path/tofile and print it
+s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p
+
+' | $apply_cmd
+@end example
+@c end---------------------------------------------
+
+@node Print bash environment
+@section Print @command{bash} Environment
+
+This script strips the definition of the shell functions
+from the output of the @command{set} Bourne-shell command.
+
+@c start-------------------------------------------
+@example
+#!/bin/sh
+
+set | sed -n '
+:x
+
+@ifinfo
+# if no occurrence of "=()" print and load next line
+@end ifinfo
+@ifnotinfo
+# if no occurrence of @samp{=()} print and load next line
+@end ifnotinfo
+/=()/! @{ p; b; @}
+/ () $/! @{ p; b; @}
+
+# possible start of functions section
+# save the line in case this is a var like FOO="() "
+h
+
+# if the next line has a brace, we quit because
+# nothing comes after functions
+n
+/^@{/ q
+
+# print the old line
+x; p
+
+# work on the new line now
+x; bx
+'
+@end example
+@c end---------------------------------------------
+
+@node Reverse chars of lines
+@section Reverse Characters of Lines
+
+This script can be used to reverse the position of characters
+in lines. The technique moves two characters at a time, hence
+it is faster than more intuitive implementations.
+
+Note the @code{tx} command before the definition of the label.
+This is often needed to reset the flag that is tested by
+the @code{t} command.
+
+Imaginative readers will find uses for this script. An example
+is reversing the output of @command{banner}.@footnote{This requires
+another script to pad the output of banner; for example
+
+@example
+#! /bin/sh
+
+banner -w $1 $2 $3 $4 |
+ sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' |
+ ~/sedscripts/reverseline.sed
+@end example
+}
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+/../! b
+
+# Reverse a line. Begin embedding the line between two newlines
+s/^.*$/\
+&\
+/
+
+# Move first character at the end. The regexp matches until
+# there are zero or one characters between the markers
+tx
+:x
+s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/
+tx
+
+# Remove the newline markers
+s/\n//g
+@end example
+@c end---------------------------------------------
+
+@node tac
+@section Reverse Lines of Files
+
+This one begins a series of totally useless (yet interesting)
+scripts emulating various Unix commands. This, in particular,
+is a @command{tac} workalike.
+
+Note that on implementations other than @acronym{GNU} @command{sed}
+@ifset PERL
+and @value{SSED}
+@end ifset
+this script might easily overflow internal buffers.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# reverse all lines of input, i.e. first line became last, ...
+
+# from the second line, the buffer (which contains all previous lines)
+# is *appended* to current line, so, the order will be reversed
+1! G
+
+# on the last line we're done -- print everything
+$ p
+
+# store everything on the buffer again
+h
+@end example
+@c end---------------------------------------------
+
+@node cat -n
+@section Numbering Lines
+
+This script replaces @samp{cat -n}; in fact it formats its output
+exactly like @acronym{GNU} @command{cat} does.
+
+Of course this is completely useless and for two reasons: first,
+because somebody else did it in C, second, because the following
+Bourne-shell script could be used for the same purpose and would
+be much faster:
+
+@c start-------------------------------------------
+@example
+#! /bin/sh
+sed -e "=" $@@ | sed -e '
+ s/^/ /
+ N
+ s/^ *\(......\)\n/\1 /
+'
+@end example
+@c end---------------------------------------------
+
+It uses @command{sed} to print the line number, then groups lines two
+by two using @code{N}. Of course, this script does not teach as much as
+the one presented below.
+
+The algorithm used for incrementing uses both buffers, so the line
+is printed as soon as possible and then discarded. The number
+is split so that changing digits go in a buffer and unchanged ones go
+in the other; the changed digits are modified in a single step
+(using a @code{y} command). The line number for the next line
+is then composed and stored in the hold space, to be used in the
+next iteration.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# Prime the pump on the first line
+x
+/^$/ s/^.*$/1/
+
+# Add the correct line number before the pattern
+G
+h
+
+# Format it and print it
+s/^/ /
+s/^ *\(......\)\n/\1 /p
+
+# Get the line number from hold space; add a zero
+# if we're going to add a digit on the next line
+g
+s/\n.*$//
+/^9*$/ s/^/0/
+
+# separate changing/unchanged digits with an x
+s/.9*$/x&/
+
+# keep changing digits in hold space
+h
+s/^.*x//
+y/0123456789/1234567890/
+x
+
+# keep unchanged digits in pattern space
+s/x.*$//
+
+# compose the new number, remove the newline implicitly added by G
+G
+s/\n//
+h
+@end example
+@c end---------------------------------------------
+
+@node cat -b
+@section Numbering Non-blank Lines
+
+Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only
+have to select which lines are to be numbered and which are not.
+
+The part that is common to this script and the previous one is
+not commented to show how important it is to comment @command{sed}
+scripts properly...
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+/^$/ @{
+ p
+ b
+@}
+
+# Same as cat -n from now
+x
+/^$/ s/^.*$/1/
+G
+h
+s/^/ /
+s/^ *\(......\)\n/\1 /p
+x
+s/\n.*$//
+/^9*$/ s/^/0/
+s/.9*$/x&/
+h
+s/^.*x//
+y/0123456789/1234567890/
+x
+s/x.*$//
+G
+s/\n//
+h
+@end example
+@c end---------------------------------------------
+
+@node wc -c
+@section Counting Characters
+
+This script shows another way to do arithmetic with @command{sed}.
+In this case we have to add possibly large numbers, so implementing
+this by successive increments would not be feasible (and possibly
+even more complicated to contrive than this script).
+
+The approach is to map numbers to letters, kind of an abacus
+implemented with @command{sed}. @samp{a}s are units, @samp{b}s are
+tenths and so on: we simply add the number of characters
+on the current line as units, and then propagate the carry
+to tenths, hundredths, and so on.
+
+As usual, running totals are kept in hold space.
+
+On the last line, we convert the abacus form back to decimal.
+For the sake of variety, this is done with a loop rather than
+with some 80 @code{s} commands@footnote{Some implementations
+have a limit of 199 commands per script}: first we
+convert units, removing @samp{a}s from the number; then we
+rotate letters so that tenths become @samp{a}s, and so on
+until no more letters remain.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# Add n+1 a's to hold space (+1 is for the newline)
+s/./a/g
+H
+x
+s/\n/a/
+
+# Do the carry. The t's and b's are not necessary,
+# but they do speed up the thing
+t a
+: a; s/aaaaaaaaaa/b/g; t b; b done
+: b; s/bbbbbbbbbb/c/g; t c; b done
+: c; s/cccccccccc/d/g; t d; b done
+: d; s/dddddddddd/e/g; t e; b done
+: e; s/eeeeeeeeee/f/g; t f; b done
+: f; s/ffffffffff/g/g; t g; b done
+: g; s/gggggggggg/h/g; t h; b done
+: h; s/hhhhhhhhhh//g
+
+: done
+$! @{
+ h
+ b
+@}
+
+# On the last line, convert back to decimal
+
+: loop
+/a/! s/[b-h]*/&0/
+s/aaaaaaaaa/9/
+s/aaaaaaaa/8/
+s/aaaaaaa/7/
+s/aaaaaa/6/
+s/aaaaa/5/
+s/aaaa/4/
+s/aaa/3/
+s/aa/2/
+s/a/1/
+
+: next
+y/bcdefgh/abcdefg/
+/[a-h]/ b loop
+p
+@end example
+@c end---------------------------------------------
+
+@node wc -w
+@section Counting Words
+
+This script is almost the same as the previous one, once each
+of the words on the line is converted to a single @samp{a}
+(in the previous script each letter was changed to an @samp{a}).
+
+It is interesting that real @command{wc} programs have optimized
+loops for @samp{wc -c}, so they are much slower at counting
+words rather than characters. This script's bottleneck,
+instead, is arithmetic, and hence the word-counting one
+is faster (it has to manage smaller numbers).
+
+Again, the common parts are not commented to show the importance
+of commenting @command{sed} scripts.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# Convert words to a's
+s/[ @kbd{tab}][ @kbd{tab}]*/ /g
+s/^/ /
+s/ [^ ][^ ]*/a /g
+s/ //g
+
+# Append them to hold space
+H
+x
+s/\n//
+
+# From here on it is the same as in wc -c.
+/aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g
+/bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g
+/cccccccccc/! bx; s/cccccccccc/d/g
+/dddddddddd/! bx; s/dddddddddd/e/g
+/eeeeeeeeee/! bx; s/eeeeeeeeee/f/g
+/ffffffffff/! bx; s/ffffffffff/g/g
+/gggggggggg/! bx; s/gggggggggg/h/g
+s/hhhhhhhhhh//g
+:x
+$! @{ h; b; @}
+:y
+/a/! s/[b-h]*/&0/
+s/aaaaaaaaa/9/
+s/aaaaaaaa/8/
+s/aaaaaaa/7/
+s/aaaaaa/6/
+s/aaaaa/5/
+s/aaaa/4/
+s/aaa/3/
+s/aa/2/
+s/a/1/
+y/bcdefgh/abcdefg/
+/[a-h]/ by
+p
+@end example
+@c end---------------------------------------------
+
+@node wc -l
+@section Counting Lines
+
+No strange things are done now, because @command{sed} gives us
+@samp{wc -l} functionality for free!!! Look:
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+$=
+@end example
+@c end---------------------------------------------
+
+@node head
+@section Printing the First Lines
+
+This script is probably the simplest useful @command{sed} script.
+It displays the first 10 lines of input; the number of displayed
+lines is right before the @code{q} command.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+10q
+@end example
+@c end---------------------------------------------
+
+@node tail
+@section Printing the Last Lines
+
+Printing the last @var{n} lines rather than the first is more complex
+but indeed possible. @var{n} is encoded in the second line, before
+the bang character.
+
+This script is similar to the @command{tac} script in that it keeps the
+final output in the hold space and prints it at the end:
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+1! @{; H; g; @}
+1,10 !s/[^\n]*\n//
+$p
+h
+@end example
+@c end---------------------------------------------
+
+Mainly, the scripts keeps a window of 10 lines and slides it
+by adding a line and deleting the oldest (the substitution command
+on the second line works like a @code{D} command but does not
+restart the loop).
+
+The ``sliding window'' technique is a very powerful way to write
+efficient and complex @command{sed} scripts, because commands like
+@code{P} would require a lot of work if implemented manually.
+
+To introduce the technique, which is fully demonstrated in the
+rest of this chapter and is based on the @code{N}, @code{P}
+and @code{D} commands, here is an implementation of @command{tail}
+using a simple ``sliding window.''
+
+This looks complicated but in fact the working is the same as
+the last script: after we have kicked in the appropriate number
+of lines, however, we stop using the hold space to keep inter-line
+state, and instead use @code{N} and @code{D} to slide pattern
+space by one line:
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+1h
+2,10 @{; H; g; @}
+$q
+1,9d
+N
+D
+@end example
+@c end---------------------------------------------
+
+
+@node uniq
+@section Make Duplicate Lines Unique
+
+This is an example of the art of using the @code{N}, @code{P}
+and @code{D} commands, probably the most difficult to master.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+h
+
+:b
+# On the last line, print and exit
+$b
+N
+/^\(.*\)\n\1$/ @{
+ # The two lines are identical. Undo the effect of
+ # the n command.
+ g
+ bb
+@}
+
+# If the @code{N} command had added the last line, print and exit
+$b
+
+# The lines are different; print the first and go
+# back working on the second.
+P
+D
+@end example
+@c end---------------------------------------------
+
+As you can see, we mantain a 2-line window using @code{P} and @code{D}.
+This technique is often used in advanced @command{sed} scripts.
+
+@node uniq -d
+@section Print Duplicated Lines of Input
+
+This script prints only duplicated lines, like @samp{uniq -d}.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+$b
+N
+/^\(.*\)\n\1$/ @{
+ # Print the first of the duplicated lines
+ s/.*\n//
+ p
+
+ # Loop until we get a different line
+ :b
+ $b
+ N
+ /^\(.*\)\n\1$/ @{
+ s/.*\n//
+ bb
+ @}
+@}
+
+# The last line cannot be followed by duplicates
+$b
+
+# Found a different one. Leave it alone in the pattern space
+# and go back to the top, hunting its duplicates
+D
+@end example
+@c end---------------------------------------------
+
+@node uniq -u
+@section Remove All Duplicated Lines
+
+This script prints only unique lines, like @samp{uniq -u}.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+# Search for a duplicate line --- until that, print what you find.
+$b
+N
+/^\(.*\)\n\1$/ ! @{
+ P
+ D
+@}
+
+:c
+# Got two equal lines in pattern space. At the
+# end of the file we simply exit
+$d
+
+# Else, we keep reading lines with @code{N} until we
+# find a different one
+s/.*\n//
+N
+/^\(.*\)\n\1$/ @{
+ bc
+@}
+
+# Remove the last instance of the duplicate line
+# and go back to the top
+D
+@end example
+@c end---------------------------------------------
+
+@node cat -s
+@section Squeezing Blank Lines
+
+As a final example, here are three scripts, of increasing complexity
+and speed, that implement the same function as @samp{cat -s}, that is
+squeezing blank lines.
+
+The first leaves a blank line at the beginning and end if there are
+some already.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+# on empty lines, join with next
+# Note there is a star in the regexp
+:x
+/^\n*$/ @{
+N
+bx
+@}
+
+# now, squeeze all '\n', this can be also done by:
+# s/^\(\n\)*/\1/
+s/\n*/\
+/
+@end example
+@c end---------------------------------------------
+
+This one is a bit more complex and removes all empty lines
+at the beginning. It does leave a single blank line at end
+if one was there.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+# delete all leading empty lines
+1,/^./@{
+/./!d
+@}
+
+# on an empty line we remove it and all the following
+# empty lines, but one
+:x
+/./!@{
+N
+s/^\n$//
+tx
+@}
+@end example
+@c end---------------------------------------------
+
+This removes leading and trailing blank lines. It is also the
+fastest. Note that loops are completely done with @code{n} and
+@code{b}, without exploting the fact that @command{sed} cycles back
+to the top of the script automatically at the end of a line.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# delete all (leading) blanks
+/./!d
+
+# get here: so there is a non empty
+:x
+# print it
+p
+# get next
+n
+# got chars? print it again, etc...
+/./bx
+
+# no, don't have chars: got an empty line
+:z
+# get next, if last line we finish here so no trailing
+# empty lines are written
+n
+# also empty? then ignore it, and get next... this will
+# remove ALL empty lines
+/./!bz
+
+# all empty lines were deleted/ignored, but we have a non empty. As
+# what we want to do is to squeeze, insert a blank line artificially
+i\
+
+bx
+@end example
+@c end---------------------------------------------
+
+@node Limitations
+@chapter @value{SSED}'s Limitations and Non-limitations
+
+@cindex @acronym{GNU} extensions, unlimited line length
+@cindex Portability, line length limitations
+For those who want to write portable @command{sed} scripts,
+be aware that some implementations have been known to
+limit line lengths (for the pattern and hold spaces)
+to be no more than 4000 bytes.
+The @sc{posix} standard specifies that conforming @command{sed}
+implementations shall support at least 8192 byte line lengths.
+@value{SSED} has no built-in limit on line length;
+as long as it can @code{malloc()} more (virtual) memory,
+you can feed or construct lines as long as you like.
+
+However, recursion is used to handle subpatterns and indefinite
+repetition. This means that the available stack space may limit
+the size of the buffer that can be processed by certain patterns.
+
+@ifset PERL
+There are some size limitations in the regular expression
+matcher but it is hoped that they will never in practice
+be relevant. The maximum length of a compiled pattern
+is 65539 (sic) bytes. All values in repeating quantifiers
+must be less than 65536. The maximum nesting depth of
+all parenthesized subpatterns, including capturing and
+non-capturing subpatterns@footnote{The
+distinction is meaningful when referring to Perl-style
+regular expressions.}, assertions, and other types of
+subpattern, is 200.
+
+Also, @value{SSED} recognizes the @sc{posix} syntax
+@code{[.@var{ch}.]} and @code{[=@var{ch}=]}
+where @var{ch} is a ``collating element'', but these
+are not supported, and an error is given if they are
+encountered.
+
+Here are a few distinctions between the real Perl-style
+regular expressions and those that @option{-R} recognizes.
+
+@enumerate
+@item
+Lookahead assertions do not allow repeat quantifiers after them
+Perl permits them, but they do not mean what you
+might think. For example, @samp{(?!a)@{3@}} does not assert that the
+next three characters are not @samp{a}. It just asserts three times that the
+next character is not @samp{a} --- a waste of time and nothing else.
+
+@item
+Capturing subpatterns that occur inside negative lookahead
+head assertions are counted, but their entries are counted
+as empty in the second half of an @code{s} command.
+Perl sets its numerical variables from any such patterns
+that are matched before the assertion fails to match
+something (thereby succeeding), but only if the negative
+lookahead assertion contains just one branch.
+
+@item
+The following Perl escape sequences are not supported:
+@samp{\l}, @samp{\u}, @samp{\L}, @samp{\U}, @samp{\E},
+@samp{\Q}. In fact these are implemented by Perl's general
+string-handling and are not part of its pattern matching engine.
+
+@item
+The Perl @samp{\G} assertion is not supported as it is not
+relevant to single pattern matches.
+
+@item
+Fairly obviously, @value{SSED} does not support the @samp{(?@{code@})}
+and @samp{(?p@{code@})} constructions. However, there is some experimental
+support for recursive patterns using the non-Perl item @samp{(?R)}.
+
+@item
+There are at the time of writing some oddities in Perl
+5.005_02 concerned with the settings of captured strings
+when part of a pattern is repeated. For example, matching
+@samp{aba} against the pattern @samp{/^(a(b)?)+$/} sets
+@samp{$2}@footnote{@samp{$2} would be @samp{\2} in @value{SSED}.}
+to the value @samp{b}, but matching @samp{aabbaa}
+against @samp{/^(aa(bb)?)+$/} leaves @samp{$2}
+unset. However, if the pattern is changed to
+@samp{/^(aa(b(b))?)+$/} then @samp{$2} (and @samp{$3}) are set.
+In Perl 5.004 @samp{$2} is set in both cases, and that is also
+true of @value{SSED}.
+
+@item
+Another as yet unresolved discrepancy is that in Perl
+5.005_02 the pattern @samp{/^(a)?(?(1)a|b)+$/} matches
+the string @samp{a}, whereas in @value{SSED} it does not.
+However, in both Perl and @value{SSED} @samp{/^(a)?a/} matched
+against @samp{a} leaves $1 unset.
+@end enumerate
+@end ifset
+
+@node Other Resources
+@chapter Other Resources for Learning About @command{sed}
+
+@cindex Additional reading about @command{sed}
+In addition to several books that have been written about @command{sed}
+(either specifically or as chapters in books which discuss
+shell programming), one can find out more about @command{sed}
+(including suggestions of a few books) from the FAQ
+for the @code{sed-users} mailing list, available from any of:
+@display
+ @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html}
+ @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html}
+@end display
+
+Also of interest are
+@uref{http://www.student.northpark.edu/pemente/sed/index.htm}
+and @uref{http://sed.sf.net/grabbag},
+which include @command{sed} tutorials and other @command{sed}-related goodies.
+
+The @code{sed-users} mailing list itself maintained by Sven Guckes.
+To subscribe, visit @uref{http://groups.yahoo.com} and search
+for the @code{sed-users} mailing list.
+
+@node Reporting Bugs
+@chapter Reporting Bugs
+
+@cindex Bugs, reporting
+Email bug reports to @email{bonzini@@gnu.org}.
+Be sure to include the word ``sed'' somewhere in the @code{Subject:} field.
+Also, please include the output of @samp{sed --version} in the body
+of your report if at all possible.
+
+Please do not send a bug report like this:
+
+@example
+@i{while building frobme-1.3.4}
+$ configure
+@error{} sed: file sedscr line 1: Unknown option to 's'
+@end example
+
+If @value{SSED} doesn't configure your favorite package, take a
+few extra minutes to identify the specific problem and make a stand-alone
+test case. Unlike other programs such as C compilers, making such test
+cases for @command{sed} is quite simple.
+
+A stand-alone test case includes all the data necessary to perform the
+test, and the specific invocation of @command{sed} that causes the problem.
+The smaller a stand-alone test case is, the better. A test case should
+not involve something as far removed from @command{sed} as ``try to configure
+frobme-1.3.4''. Yes, that is in principle enough information to look
+for the bug, but that is not a very practical prospect.
+
+Here are a few commonly reported bugs that are not bugs.
+
+@table @asis
+@item @code{N} command on the last line
+@cindex Portability, @code{N} command on the last line
+@cindex Non-bugs, @code{N} command on the last line
+
+Most versions of @command{sed} exit without printing anything when
+the @command{N} command is issued on the last line of a file.
+@value{SSED} prints pattern space before exiting unless of course
+the @command{-n} command switch has been specified. This choice is
+by design.
+
+For example, the behavior of
+@example
+sed N foo bar
+@end example
+@noindent
+would depend on whether foo has an even or an odd number of
+lines@footnote{which is the actual ``bug'' that prompted the
+change in behavior}. Or, when writing a script to read the
+next few lines following a pattern match, traditional
+implementations of @code{sed} would force you to write
+something like
+@example
+/foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @}
+@end example
+@noindent
+instead of just
+@example
+/foo/@{ N;N;N;N;N;N;N;N;N; @}
+@end example
+
+@cindex @code{POSIXLY_CORRECT} behavior, @code{N} command
+In any case, the simplest workaround is to use @code{$d;N} in
+scripts that rely on the traditional behavior, or to set
+the @code{POSIXLY_CORRECT} variable to a non-empty value.
+
+@item Regex syntax clashes (problems with backslashes)
+@cindex @acronym{GNU} extensions, to basic regular expressions
+@cindex Non-bugs, regex syntax clashes
+@command{sed} uses the @sc{posix} basic regular expression syntax. According to
+the standard, the meaning of some escape sequences is undefined in
+this syntax; notable in the case of @command{sed} are @code{\|},
+@code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<},
+@code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}.
+
+As in all @acronym{GNU} programs that use @sc{posix} basic regular
+expressions, @command{sed} interprets these escape sequences as special
+characters. So, @code{x\+} matches one or more occurrences of @samp{x}.
+@code{abc\|def} matches either @samp{abc} or @samp{def}.
+
+This syntax may cause problems when running scripts written for other
+@command{sed}s. Some @command{sed} programs have been written with the
+assumption that @code{\|} and @code{\+} match the literal characters
+@code{|} and @code{+}. Such scripts must be modified by removing the
+spurious backslashes if they are to be used with modern implementations
+of @command{sed}, like
+@ifset PERL
+@value{SSED} or
+@end ifset
+@acronym{GNU} @command{sed}.
+
+On the other hand, some scripts use s|abc\|def||g to remove occurrences
+of @emph{either} @code{abc} or @code{def}. While this worked until
+@command{sed} 4.0.x, newer versions interpret this as removing the
+string @code{abc|def}. This is again undefined behavior according to
+@acronym{POSIX}, and this interpretation is arguably more robust: older
+@command{sed}s, for example, required that the regex matcher parsed
+@code{\/} as @code{/} in the common case of escaping a slash, which is
+again undefined behavior; the new behavior avoids this, and this is good
+because the regex matcher is only partially under our control.
+
+@cindex @acronym{GNU} extensions, special escapes
+In addition, this version of @command{sed} supports several escape characters
+(some of which are multi-character) to insert non-printable characters
+in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r},
+@code{\t}, @code{\v}, @code{\x}). These can cause similar problems
+with scripts written for other @command{sed}s.
+
+@item @option{-i} clobbers read-only files
+@cindex In-place editing
+@cindex @value{SSEDEXT}, in-place editing
+@cindex Non-bugs, in-place editing
+
+In short, @samp{sed -i} will let you delete the contents of
+a read-only file, and in general the @option{-i} option
+(@pxref{Invoking sed, , Invocation}) lets you clobber
+protected files. This is not a bug, but rather a consequence
+of how the Unix filesystem works.
+
+The permissions on a file say what can happen to the data
+in that file, while the permissions on a directory say what can
+happen to the list of files in that directory. @samp{sed -i}
+will not ever open for writing a file that is already on disk.
+Rather, it will work on a temporary file that is finally renamed
+to the original name: if you rename or delete files, you're actually
+modifying the contents of the directory, so the operation depends on
+the permissions of the directory, not of the file. For this same
+reason, @command{sed} does not let you use @option{-i} on a writeable file
+in a read-only directory (but unbelievably nobody reports that as a
+bug@dots{}).
+
+@item @code{0a} does not work (gives an error)
+There is no line 0. 0 is a special address that is only used to treat
+addresses like @samp{0,/@var{RE}/} as active when the script starts: if
+you write @samp{1,/abc/d} and the first line includes the word @samp{abc},
+then that match would be ignored because address ranges must span at least
+two lines (barring the end of the file); but what you probably wanted is
+to delete every line up to the first one including @samp{abc}, and this
+is obtained with @samp{0,/abc/d}.
+@end table
+
+@node Extended regexps
+@appendix Extended regular expressions
+@cindex Extended regular expressions, syntax
+
+The only difference between basic and extended regular expressions is in
+the behavior of a few characters: @samp{?}, @samp{+}, parentheses,
+and braces (@samp{@{@}}). While basic regular expressions require
+these to be escaped if you want them to behave as special characters,
+when using extended regular expressions you must escape them if
+you want them @emph{to match a literal character}.
+
+@noindent
+Examples:
+@table @code
+@item abc?
+becomes @samp{abc\?} when using extended regular expressions. It matches
+the literal string @samp{abc?}.
+
+@item c\+
+becomes @samp{c+} when using extended regular expressions. It matches
+one or more @samp{c}s.
+
+@item a\@{3,\@}
+becomes @samp{a@{3,@}} when using extended regular expressions. It matches
+three or more @samp{a}s.
+
+@item \(abc\)\@{2,3\@}
+becomes @samp{(abc)@{2,3@}} when using extended regular expressions. It
+matches either @samp{abcabc} or @samp{abcabcabc}.
+
+@item \(abc*\)\1
+becomes @samp{(abc*)\1} when using extended regular expressions.
+Backreferences must still be escaped when using extended regular
+expressions.
+@end table
+
+@ifset PERL
+@node Perl regexps
+@appendix Perl-style regular expressions
+@cindex Perl-style regular expressions, syntax
+
+@emph{This part is taken from the @file{pcre.txt} file distributed together
+with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.}
+
+Perl introduced several extensions to regular expressions, some
+of them incompatible with the syntax of regular expressions
+accepted by Emacs and other @acronym{GNU} tools (whose matcher was
+based on the Emacs matcher). @value{SSED} implements
+both kinds of extensions.
+
+@iftex
+Summarizing, we have:
+
+@itemize @bullet
+@item
+A backslash can introduce several special sequences
+
+@item
+The circumflex, dollar sign, and period characters behave specially
+with regard to new lines
+
+@item
+Strange uses of square brackets are parsed differently
+
+@item
+You can toggle modifiers in the middle of a regular expression
+
+@item
+You can specify that a subpattern does not count when numbering backreferences
+
+@item
+@cindex Greedy regular expression matching
+You can specify greedy or non-greedy matching
+
+@item
+You can have more than ten back references
+
+@item
+You can do complex look aheads and look behinds (in the spirit of
+@code{\b}, but with subpatterns).
+
+@item
+You can often improve performance by avoiding that @command{sed} wastes
+time with backtracking
+
+@item
+You can have if/then/else branches
+
+@item
+You can do recursive matches, for example to look for unbalanced parentheses
+
+@item
+You can have comments and non-significant whitespace, because things can
+get complex...
+@end itemize
+
+Most of these extensions are introduced by the special @code{(?}
+sequence, which gives special meanings to parenthesized groups.
+@end iftex
+@menu
+Other extensions can be roughly subdivided in two categories
+On one hand Perl introduces several more escaped sequences
+(that is, sequences introduced by a backslash). On the other
+hand, it specifies that if a question mark follows an open
+parentheses it should give a special meaning to the parenthesized
+group.
+
+* Backslash:: Introduces special sequences
+* Circumflex/dollar sign/period:: Behave specially with regard to new lines
+* Square brackets:: Are a bit different in strange cases
+* Options setting:: Toggle modifiers in the middle of a regexp
+* Non-capturing subpatterns:: Are not counted when backreferencing
+* Repetition:: Allows for non-greedy matching
+* Backreferences:: Allows for more than 10 back references
+* Assertions:: Allows for complex look ahead matches
+* Non-backtracking subpatterns:: Often gives more performance
+* Conditional subpatterns:: Allows if/then/else branches
+* Recursive patterns:: For example to match parentheses
+* Comments:: Because things can get complex...
+@end menu
+
+@node Backslash
+@appendixsec Backslash
+@cindex Perl-style regular expressions, escaped sequences
+
+There are a few difference in the handling of backslashed
+sequences in Perl mode.
+
+First of all, there are no @code{\o} and @code{\d} sequences.
+@sc{ascii} values for characters can be specified in octal
+with a @code{\@var{xxx}} sequence, where @var{xxx} is a
+sequence of up to three octal digits. If the first digit
+is a zero, the treatment of the sequence is straightforward;
+just note that if the character that follows the escaped digit
+is itself an octal digit, you have to supply three octal digits
+for @var{xxx}. For example @code{\07} is a @sc{bel} character
+rather than a @sc{nul} and a literal @code{7} (this sequence is
+instead represented by @code{\0007}).
+
+@cindex Perl-style regular expressions, backreferences
+The handling of a backslash followed by a digit other than 0
+is complicated. Outside a character class, @command{sed} reads it
+and any following digits as a decimal number. If the number
+is less than 10, or if there have been at least that many
+previous capturing left parentheses in the expression, the
+entire sequence is taken as a back reference. A description
+of how this works is given later, following the discussion
+of parenthesized subpatterns.
+
+Inside a character class, or if the decimal number is
+greater than 9 and there have not been that many capturing
+subpatterns, @command{sed} re-reads up to three octal digits following
+the backslash, and generates a single byte from the
+least significant 8 bits of the value. Any subsequent digits
+stand for themselves. For example:
+
+@example
+ \040 @i{is another way of writing a space}
+ \40 @i{is the same, provided there are fewer than 40}
+ @i{previous capturing subpatterns}
+ \7 @i{is always a back reference}
+ \011 @i{is always a tab}
+ \11 @i{might be a back reference, or another way of}
+ @i{writing a tab}
+ \0113 @i{is a tab followed by the character @samp{3}}
+ \113 @i{is the character with octal code 113 (since there}
+ @i{can be no more than 99 back references)}
+ \377 @i{is a byte consisting entirely of 1 bits (@sc{ascii} 255)}
+ \81 @i{is either a back reference, or a binary zero}
+ @i{followed by the two characters @samp{81}}
+@end example
+
+Note that octal values of 100 or greater must not be introduced
+duced by a leading zero, because no more than three octal
+digits are ever read.
+
+All the sequences that define a single byte value can be
+used both inside and outside character classes. In addition,
+inside a character class, the sequence @code{\b} is interpreted
+as the backspace character (hex 08). Outside a character
+class it has a different meaning (see below).
+
+In addition, there are four additional escapes specifying
+generic character classes (like @code{\w} and @code{\W} do):
+
+@cindex Perl-style regular expressions, character classes
+@table @samp
+@item \d
+Matches any decimal digit
+
+@item \D
+Matches any character that is not a decimal digit
+@end table
+
+In Perl mode, these character type sequences can appear both inside and
+outside character classes. Instead, in @sc{posix} mode these sequences
+(as well as @code{\w} and @code{\W}) are treated as two literal characters
+(a backslash and a letter) inside square brackets.
+
+Escaped sequences specifying assertions are also different in
+Perl mode. An assertion specifies a condition that has to be met
+at a particular point in a match, without consuming any
+characters from the subject string. The use of subpatterns
+for more complicated assertions is described below. The
+backslashed assertions are
+
+@cindex Perl-style regular expressions, assertions
+@table @samp
+@item \b
+Asserts that the point is at a word boundary.
+A word boundary is a position in the subject string where
+the current character and the previous character do not both
+match @code{\w} or @code{\W} (i.e. one matches @code{\w} and
+the other matches @code{\W}), or the start or end of the string
+if the first or last character matches @code{\w}, respectively.
+
+@item \B
+Asserts that the point is not at a word boundary.
+
+@item \A
+Asserts the matcher is at the start of pattern space (independent
+of multiline mode).
+
+@item \Z
+Asserts the matcher is at the end of pattern space,
+or at a newline before the end of pattern space (independent of
+multiline mode)
+
+@item \z
+Asserts the matcher is at the end of pattern space (independent
+of multiline mode)
+@end table
+
+These assertions may not appear in character classes (but
+note that @code{\b} has a different meaning, namely the
+backspace character, inside a character class).
+Note that Perl mode does not support directly assertions
+for the beginning and the end of word; the @acronym{GNU} extensions
+@code{\<} and @code{\>} achieve this purpose in @sc{posix} mode
+instead.
+
+The @code{\A}, @code{\Z}, and @code{\z} assertions differ
+from the traditional circumflex and dollar sign (described below)
+in that they only ever match at the very start and end of the
+subject string, whatever options are set; in particular @code{\A}
+and @code{\z} are the same as the @acronym{GNU} extensions
+@code{\`} and @code{\'} that are active in @sc{posix} mode.
+
+@node Circumflex/dollar sign/period
+@appendixsec Circumflex, dollar sign, period
+@cindex Perl-style regular expressions, newlines
+
+Outside a character class, in the default matching mode, the
+circumflex character is an assertion which is true only if
+the current matching point is at the start of the subject
+string. Inside a character class, the circumflex has an entirely
+different meaning (see below).
+
+The circumflex need not be the first character of the pattern if
+a number of alternatives are involved, but it should be the
+first thing in each alternative in which it appears if the
+pattern is ever to match that branch. If all possible alternatives,
+start with a circumflex, that is, if the pattern is
+constrained to match only at the start of the subject, it is
+said to be an @dfn{anchored} pattern. (There are also other constructs
+structs that can cause a pattern to be anchored.)
+
+A dollar sign is an assertion which is true only if the
+current matching point is at the end of the subject string,
+or immediately before a newline character that is the last
+character in the string (by default). A dollar sign need not be the
+last character of the pattern if a number of alternatives
+are involved, but it should be the last item in any branch
+in which it appears. A dollar sign has no special meaning in a
+character class.
+
+@cindex Perl-style regular expressions, multiline
+The meanings of the circumflex and dollar sign characters are
+changed if the @code{M} modifier option is used. When this is
+the case, they match immediately after and immediately
+before an internal @code{\n} character, respectively, in addition
+to matching at the start and end of the subject string. For
+example, the pattern @code{/^abc$/} matches the subject string
+@samp{def\nabc} in multiline mode, but not otherwise. Consequently,
+patterns that are anchored in single line mode
+because all branches start with @code{^} are not anchored in
+multiline mode.
+
+@cindex Perl-style regular expressions, multiline
+Note that the sequences @code{\A}, @code{\Z}, and @code{\z}
+can be used to match the start and end of the subject in both
+modes, and if all branches of a pattern start with @code{\A}
+is it always anchored, whether the @code{M} modifier is set or not.
+
+@cindex Perl-style regular expressions, single line
+Outside a character class, a dot in the pattern matches any
+one character in the subject, including a non-printing character,
+but not (by default) newline. If the @code{S} modifier is used,
+dots match newlines as well. Actually, the handling of
+dot is entirely independent of the handling of circumflex
+and dollar sign, the only relationship being that they both
+involve newline characters. Dot has no special meaning in a
+character class.
+
+@node Square brackets
+@appendixsec Square brackets
+@cindex Perl-style regular expressions, character classes
+
+An opening square bracket introduces a character class, terminated
+by a closing square bracket. A closing square bracket on its own
+is not special. If a closing square bracket is required as a
+member of the class, it should be the first data character in
+the class (after an initial circumflex, if present) or escaped with a backslash.
+
+A character class matches a single character in the subject;
+the character must be in the set of characters defined by
+the class, unless the first character in the class is a circumflex,
+in which case the subject character must not be in
+the set defined by the class. If a circumflex is actually
+required as a member of the class, ensure it is not the
+first character, or escape it with a backslash.
+
+For example, the character class [aeiou] matches any lower
+case vowel, while [^aeiou] matches any character that is not
+a lower case vowel. Note that a circumflex is just a convenient
+venient notation for specifying the characters which are in
+the class by enumerating those that are not. It is not an
+assertion: it still consumes a character from the subject
+string, and fails if the current pointer is at the end of
+the string.
+
+@cindex Perl-style regular expressions, case-insensitive
+When caseless matching is set, any letters in a class
+represent both their upper case and lower case versions, so
+for example, a caseless @code{[aeiou]} matches uppercase
+and lowercase @samp{A}s, and a caseless @code{[^aeiou]}
+does not match @samp{A}, whereas a case-sensitive version would.
+
+@cindex Perl-style regular expressions, single line
+@cindex Perl-style regular expressions, multiline
+The newline character is never treated in any special way in
+character classes, whatever the setting of the @code{S} and
+@code{M} options (modifiers) is. A class such as @code{[^a]} will
+always match a newline.
+
+The minus (hyphen) character can be used to specify a range
+of characters in a character class. For example, @code{[d-m]}
+matches any letter between d and m, inclusive. If a minus
+character is required in a class, it must be escaped with a
+backslash or appear in a position where it cannot be interpreted
+as indicating a range, typically as the first or last
+character in the class.
+
+It is not possible to have the literal character @code{]} as the
+end character of a range. A pattern such as @code{[W-]46]} is
+interpreted as a class of two characters (@code{W} and @code{-})
+followed by a literal string @code{46]}, so it would match
+@samp{W46]} or @samp{-46]}. However, if the @code{]} is escaped
+with a backslash it is interpreted as the end of range, so
+@code{[W-\]46]} is interpreted as a single class containing a
+range followed by two separate characters. The octal or
+hexadecimal representation of @code{]} can also be used to end a range.
+
+Ranges operate in @sc{ascii} collating sequence. They can also be
+used for characters specified numerically, for example
+@code{[\000-\037]}. If a range that includes letters is used when
+caseless matching is set, it matches the letters in either
+case. For example, a caseless @code{[W-c]} is equivalent to
+@code{[][\^_`wxyzabc]}, matched caselessly, and if character
+tables for the French locale are in use, @code{[\xc8-\xcb]}
+matches accented E characters in both cases.
+
+Unlike in @sc{posix} mode, the character types @code{\d},
+@code{\D}, @code{\s}, @code{\S}, @code{\w}, and @code{\W}
+may also appear in a character class, and add the characters
+that they match to the class. For example, @code{[\dABCDEF]} matches any
+hexadecimal digit. A circumflex can conveniently be used
+with the upper case character types to specify a more restricted
+set of characters than the matching lower case type.
+For example, the class @code{[^\W_]} matches any letter or digit,
+but not underscore.
+
+All non-alphameric characters other than @code{\}, @code{-},
+@code{^} (at the start) and the terminating @code{]}
+are non-special in character classes, but it does no harm
+if they are escaped.
+
+Perl 5.6 supports the @sc{posix} notation for character classes, which
+uses names enclosed by @code{[:} and @code{:]} within the enclosing
+square brackets, and @value{SSED} supports this notation as well.
+For example,
+
+@example
+ [01[:alpha:]%]
+@end example
+
+@noindent
+matches @samp{0}, @samp{1}, any alphabetic character, or @samp{%}.
+The supported class names are
+
+@table @code
+@item alnum
+Matches letters and digits
+
+@item alpha
+Matches letters
+
+@item ascii
+Matches character codes 0 - 127
+
+@item cntrl
+Matches control characters
+
+@item digit
+Matches decimal digits (same as \d)
+
+@item graph
+Matches printing characters, excluding space
+
+@item lower
+Matches lower case letters
+
+@item print
+Matches printing characters, including space
+
+@item punct
+Matches printing characters, excluding letters and digits
+
+@item space
+Matches white space (same as \s)
+
+@item upper
+Matches upper case letters
+
+@item word
+Matches ``word'' characters (same as \w)
+
+@item xdigit
+Matches hexadecimal digits
+@end table
+
+The names @code{ascii} and @code{word} are extensions valid only in
+Perl mode. Another Perl extension is negation, which is
+indicated by a circumflex character after the colon. For example,
+
+@example
+ [12[:^digit:]]
+@end example
+
+@noindent
+matches @samp{1}, @samp{2}, or any non-digit.
+
+@node Options setting
+@appendixsec Options setting
+@cindex Perl-style regular expressions, toggling options
+@cindex Perl-style regular expressions, case-insensitive
+@cindex Perl-style regular expressions, multiline
+@cindex Perl-style regular expressions, single line
+@cindex Perl-style regular expressions, extended
+
+The settings of the @code{I}, @code{M}, @code{S}, @code{X}
+modifiers can be changed from within the pattern by
+a sequence of Perl option letters enclosed between @code{(?}
+and @code{)}. The option letters must be lowercase.
+
+For example, @code{(?im)} sets caseless, multiline matching. It is
+also possible to unset these options by preceding the letter
+with a hyphen; you can also have combined settings and unsettings:
+@code{(?im-sx)} sets caseless and multiline matching,
+while unsets single line matching (for dots) and extended
+whitespace interpretation. If a letter appears both before
+and after the hyphen, the option is unset.
+
+The scope of these option changes depends on where in the
+pattern the setting occurs. For settings that are outside
+any subpattern (defined below), the effect is the same as if
+the options were set or unset at the start of matching. The
+following patterns all behave in exactly the same way:
+
+@example
+ (?i)abc
+ a(?i)bc
+ ab(?i)c
+ abc(?i)
+@end example
+
+which in turn is the same as specifying the pattern abc with
+the @code{I} modifier. In other words, ``top level'' settings
+apply to the whole pattern (unless there are other
+changes inside subpatterns). If there is more than one setting
+of the same option at top level, the rightmost setting
+is used.
+
+If an option change occurs inside a subpattern, the effect
+is different. This is a change of behaviour in Perl 5.005.
+An option change inside a subpattern affects only that part
+of the subpattern @emph{that follows} it, so
+
+@example
+ (a(?i)b)c
+@end example
+
+@noindent
+matches abc and aBc and no other strings (assuming
+case-sensitive matching is used). By this means, options can
+be made to have different settings in different parts of the
+pattern. Any changes made in one alternative do carry on
+into subsequent branches within the same subpattern. For
+example,
+
+@example
+ (a(?i)b|c)
+@end example
+
+@noindent
+matches @samp{ab}, @samp{aB}, @samp{c}, and @samp{C},
+even though when matching @samp{C} the first branch is
+abandoned before the option setting.
+This is because the effects of option settings happen at
+compile time. There would be some very weird behaviour otherwise.
+
+@ignore
+There are two PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA
+that can be changed in the same way as the Perl-compatible options by
+using the characters U and X respectively. The (?X) flag
+setting is special in that it must always occur earlier in
+the pattern than any of the additional features it turns on,
+even when it is at top level. It is best put at the start.
+@end ignore
+
+
+@node Non-capturing subpatterns
+@appendixsec Non-capturing subpatterns
+@cindex Perl-style regular expressions, non-capturing subpatterns
+
+Marking part of a pattern as a subpattern does two things.
+On one hand, it localizes a set of alternatives; on the other
+hand, it sets up the subpattern as a capturing subpattern (as
+defined above). The subpattern can be backreferenced and
+referenced in the right side of @code{s} commands.
+
+For example, if the string @samp{the red king} is matched against
+the pattern
+
+@example
+ the ((red|white) (king|queen))
+@end example
+
+@noindent
+the captured substrings are @samp{red king}, @samp{red},
+and @samp{king}, and are numbered 1, 2, and 3.
+
+The fact that plain parentheses fulfil two functions is not
+always helpful. There are often times when a grouping
+subpattern is required without a capturing requirement. If an
+opening parenthesis is followed by @code{?:}, the subpattern does
+not do any capturing, and is not counted when computing the
+number of any subsequent capturing subpatterns. For example,
+if the string @samp{the white queen} is matched against the pattern
+
+@example
+ the ((?:red|white) (king|queen))
+@end example
+
+@noindent
+the captured substrings are @samp{white queen} and @samp{queen},
+and are numbered 1 and 2. The maximum number of captured
+substrings is 99, while the maximum number of all subpatterns,
+both capturing and non-capturing, is 200.
+
+As a convenient shorthand, if any option settings are
+equired at the start of a non-capturing subpattern, the
+option letters may appear between the @code{?} and the
+@code{:}. Thus the two patterns
+
+@example
+ (?i:saturday|sunday)
+ (?:(?i)saturday|sunday)
+@end example
+
+@noindent
+match exactly the same set of strings. Because alternative
+branches are tried from left to right, and options are not
+reset until the end of the subpattern is reached, an option
+setting in one branch does affect subsequent branches, so
+the above patterns match @samp{SUNDAY} as well as @samp{Saturday}.
+
+
+@node Repetition
+@appendixsec Repetition
+@cindex Perl-style regular expressions, repetitions
+
+Repetition is specified by quantifiers, which can follow any
+of the following items:
+
+@itemize @bullet
+@item
+a single character, possibly escaped
+
+@item
+the @code{.} special character
+
+@item
+a character class
+
+@item
+a back reference (see next section)
+
+@item
+a parenthesized subpattern (unless it is an assertion; @pxref{Assertions})
+@end itemize
+
+The general repetition quantifier specifies a minimum and
+maximum number of permitted matches, by giving the two
+numbers in curly brackets (braces), separated by a comma.
+The numbers must be less than 65536, and the first must be
+less than or equal to the second. For example:
+
+@example
+ z@{2,4@}
+@end example
+
+@noindent
+matches @samp{zz}, @samp{zzz}, or @samp{zzzz}. A closing brace on its own
+is not a special character. If the second number is omitted,
+but the comma is present, there is no upper limit; if the
+second number and the comma are both omitted, the quantifier
+specifies an exact number of required matches. Thus
+
+@example
+ [aeiou]@{3,@}
+@end example
+
+@noindent
+matches at least 3 successive vowels, but may match many
+more, while
+
+@example
+ \d@{8@}
+@end example
+
+@noindent
+matches exactly 8 digits. An opening curly bracket that
+appears in a position where a quantifier is not allowed, or
+one that does not match the syntax of a quantifier, is taken
+as a literal character. For example, @{,6@} is not a quantifier,
+but a literal string of four characters.@footnote{It
+raises an error if @option{-R} is not used.}
+
+The quantifier @samp{@{0@}} is permitted, causing the expression to
+behave as if the previous item and the quantifier were not
+present.
+
+For convenience (and historical compatibility) the three
+most common quantifiers have single-character abbreviations:
+
+@table @code
+@item *
+is equivalent to @{0,@}
+
+@item +
+is equivalent to @{1,@}
+
+@item ?
+is equivalent to @{0,1@}
+@end table
+
+It is possible to construct infinite loops by following a
+subpattern that can match no characters with a quantifier
+that has no upper limit, for example:
+
+@example
+ (a?)*
+@end example
+
+Earlier versions of Perl used to give an error at
+compile time for such patterns. However, because there are
+cases where this can be useful, such patterns are now
+accepted, but if any repetition of the subpattern does in
+fact match no characters, the loop is forcibly broken.
+
+@cindex Greedy regular expression matching
+@cindex Perl-style regular expressions, stingy repetitions
+By default, the quantifiers are @dfn{greedy} like in @sc{posix}
+mode, that is, they match as much as possible (up to the maximum
+number of permitted times), without causing the rest of the
+pattern to fail. The classic example of where this gives problems
+is in trying to match comments in C programs. These appear between
+the sequences @code{/*} and @code{*/} and within the sequence, individual
+@code{*} and @code{/} characters may appear. An attempt to match C
+comments by applying the pattern
+
+@example
+ /\*.*\*/
+@end example
+
+@noindent
+to the string
+
+@example
+ /* first command */ not comment /* second comment */
+@end example
+
+@noindent
+
+fails, because it matches the entire string owing to the
+greediness of the @code{.*} item.
+
+However, if a quantifier is followed by a question mark, it
+ceases to be greedy, and instead matches the minimum number
+of times possible, so the pattern @code{/\*.*?\*/}
+does the right thing with the C comments. The meaning of the
+various quantifiers is not otherwise changed, just the preferred
+number of matches. Do not confuse this use of question
+mark with its use as a quantifier in its own right.
+Because it has two uses, it can sometimes appear doubled, as in
+
+@example
+ \d??\d
+@end example
+
+which matches one digit by preference, but can match two if
+that is the only way the rest of the pattern matches.
+
+Note that greediness does not matter when specifying addresses,
+but can be nevertheless used to improve performance.
+
+@ignore
+ If the PCRE_UNGREEDY option is set (an option which is not
+ available in Perl), the quantifiers are not greedy by
+ default, but individual ones can be made greedy by following
+ them with a question mark. In other words, it inverts the
+ default behaviour.
+@end ignore
+
+When a parenthesized subpattern is quantified with a minimum
+repeat count that is greater than 1 or with a limited maximum,
+more store is required for the compiled pattern, in
+proportion to the size of the minimum or maximum.
+
+@cindex Perl-style regular expressions, single line
+If a pattern starts with @code{.*} or @code{.@{0,@}} and the
+@code{S} modifier is used, the pattern is implicitly anchored,
+because whatever follows will be tried against every character
+position in the subject string, so there is no point in
+retrying the overall match at any position after the first.
+PCRE treats such a pattern as though it were preceded by \A.
+
+When a capturing subpattern is repeated, the value captured
+is the substring that matched the final iteration. For example,
+after
+
+@example
+ (tweedle[dume]@{3@}\s*)+
+@end example
+
+@noindent
+has matched @samp{tweedledum tweedledee} the value of the
+captured substring is @samp{tweedledee}. However, if there are
+nested capturing subpatterns, the corresponding captured
+values may have been set in previous iterations. For example,
+after
+
+@example
+ /(a|(b))+/
+@end example
+
+matches @samp{aba}, the value of the second captured substring is
+@samp{b}.
+
+@node Backreferences
+@appendixsec Backreferences
+@cindex Perl-style regular expressions, backreferences
+
+Outside a character class, a backslash followed by a digit
+greater than 0 (and possibly further digits) is a back
+reference to a capturing subpattern earlier (i.e. to its
+left) in the pattern, provided there have been that many
+previous capturing left parentheses.
+
+However, if the decimal number following the backslash is
+less than 10, it is always taken as a back reference, and
+causes an error only if there are not that many capturing
+left parentheses in the entire pattern. In other words, the
+parentheses that are referenced need not be to the left of
+the reference for numbers less than 10. @ref{Backslash}
+for further details of the handling of digits following a backslash.
+
+A back reference matches whatever actually matched the capturing
+subpattern in the current subject string, rather than
+anything matching the subpattern itself. So the pattern
+
+@example
+ (sens|respons)e and \1ibility
+@end example
+
+@noindent
+matches @samp{sense and sensibility} and @samp{response and responsibility},
+but not @samp{sense and responsibility}. If caseful
+matching is in force at the time of the back reference, the
+case of letters is relevant. For example,
+
+@example
+ ((?i)blah)\s+\1
+@end example
+
+@noindent
+matches @samp{blah blah} and @samp{Blah Blah}, but not
+@samp{BLAH blah}, even though the original capturing
+subpattern is matched caselessly.
+
+There may be more than one back reference to the same subpattern.
+Also, if a subpattern has not actually been used in a
+particular match, any back references to it always fail. For
+example, the pattern
+
+@example
+ (a|(bc))\2
+@end example
+
+@noindent
+always fails if it starts to match @samp{a} rather than
+@samp{bc}. Because there may be up to 99 back references, all
+digits following the backslash are taken as part of a potential
+back reference number; this is different from what happens
+in @sc{posix} mode. If the pattern continues with a digit
+character, some delimiter must be used to terminate the back
+reference. If the @code{X} modifier option is set, this can be
+whitespace. Otherwise an empty comment can be used, or the
+following character can be expressed in hexadecimal or octal.
+
+A back reference that occurs inside the parentheses to which
+it refers fails when the subpattern is first used, so, for
+example, @code{(a\1)} never matches. However, such references
+can be useful inside repeated subpatterns. For example, the
+pattern
+
+@example
+ (a|b\1)+
+@end example
+
+@noindent
+matches any number of @samp{a}s and also @samp{aba}, @samp{ababbaa},
+etc. At each iteration of the subpattern, the back reference matches
+the character string corresponding to the previous iteration. In
+order for this to work, the pattern must be such that the first
+iteration does not need to match the back reference. This can be
+done using alternation, as in the example above, or by a
+quantifier with a minimum of zero.
+
+@node Assertions
+@appendixsec Assertions
+@cindex Perl-style regular expressions, assertions
+@cindex Perl-style regular expressions, asserting subpatterns
+
+An assertion is a test on the characters following or
+preceding the current matching point that does not actually
+consume any characters. The simple assertions coded as @code{\b},
+@code{\B}, @code{\A}, @code{\Z}, @code{\z}, @code{^} and @code{$}
+are described above. More complicated assertions are coded as
+subpatterns. There are two kinds: those that look ahead of the
+current position in the subject string, and those that look behind it.
+
+@cindex Perl-style regular expressions, lookahead subpatterns
+An assertion subpattern is matched in the normal way, except
+that it does not cause the current matching position to be
+changed. Lookahead assertions start with @code{(?=} for positive
+assertions and @code{(?!} for negative assertions. For example,
+
+@example
+ \w+(?=;)
+@end example
+
+@noindent
+matches a word followed by a semicolon, but does not include
+the semicolon in the match, and
+
+@example
+ foo(?!bar)
+@end example
+
+@noindent
+matches any occurrence of @samp{foo} that is not followed by
+@samp{bar}.
+
+Note that the apparently similar pattern
+
+@example
+ (?!foo)bar
+@end example
+
+@noindent
+@cindex Perl-style regular expressions, lookbehind subpatterns
+finds any occurrence of @samp{bar} even if it is preceded by
+@samp{foo}, because the assertion @code{(?!foo)} is always true
+when the next three characters are @samp{bar}. A lookbehind
+assertion is needed to achieve this effect.
+Lookbehind assertions start with @code{(?<=} for positive
+assertions and @code{(?<!} for negative assertions. So,
+
+@example
+ (?<!foo)bar
+@end example
+
+achieves the required effect of finding an occurrence of
+@samp{bar} that is not preceded by @samp{foo}. The contents of a
+lookbehind assertion are restricted
+such that all the strings it matches must have a fixed
+length. However, if there are several alternatives, they do
+not all have to have the same fixed length. This is an extension
+compared with Perl 5.005, which requires all branches to match
+the same length of string. Thus
+
+@example
+ (?<=dogs|cats|)
+@end example
+
+@noindent
+is permitted, but the apparently equivalent regular expression
+
+@example
+ (?<!dogs?|cats?)
+@end example
+
+@noindent
+causes an error at compile time. Branches that match different
+length strings are permitted only at the top level of
+a lookbehind assertion: an assertion such as
+
+@example
+ (?<=ab(c|de))
+@end example
+
+@noindent
+is not permitted, because its single top-level branch can
+match two different lengths, but it is acceptable if rewritten
+to use two top-level branches:
+
+@example
+ (?<=abc|abde)
+@end example
+
+All this is required because lookbehind assertions simply
+move the current position back by the alternative's fixed
+width and then try to match. If there are
+insufficient characters before the current position, the
+match is deemed to fail. Lookbehinds, in conjunction with
+non-backtracking subpatterns can be particularly useful for
+matching at the ends of strings; an example is given at the end
+of the section on non-backtracking subpatterns.
+
+Several assertions (of any sort) may occur in succession.
+For example,
+
+@example
+ (?<=\d@{3@})(?<!999)foo
+@end example
+
+@noindent
+matches @samp{foo} preceded by three digits that are not @samp{999}.
+Notice that each of the assertions is applied independently
+at the same point in the subject string. First there is a
+check that the previous three characters are all digits, and
+then there is a check that the same three characters are not
+@samp{999}. This pattern does not match @samp{foo} preceded by six
+characters, the first of which are digits and the last three
+of which are not @samp{999}. For example, it doesn't match
+@samp{123abcfoo}. A pattern to do that is
+
+@example
+ (?<=\d@{3@}...)(?<!999)foo
+@end example
+
+@noindent
+This time the first assertion looks at the preceding six
+characters, checking that the first three are digits, and
+then the second assertion checks that the preceding three
+characters are not @samp{999}. Actually, assertions can be
+nested in any combination, so one can write this as
+
+@example
+ (?<=\d@{3@}(?!999)...)foo
+@end example
+
+or
+
+@example
+ (?<=\d@{3@}...(?<!999))foo
+@end example
+
+@noindent
+both of which might be considered more readable.
+
+Assertion subpatterns are not capturing subpatterns, and may
+not be repeated, because it makes no sense to assert the
+same thing several times. If any kind of assertion contains
+capturing subpatterns within it, these are counted for the
+purposes of numbering the capturing subpatterns in the whole
+pattern. However, substring capturing is carried out only
+for positive assertions, because it does not make sense for
+negative assertions.
+
+Assertions count towards the maximum of 200 parenthesized
+subpatterns.
+
+@node Non-backtracking subpatterns
+@appendixsec Non-backtracking subpatterns
+@cindex Perl-style regular expressions, non-backtracking subpatterns
+
+With both maximizing and minimizing repetition, failure of
+what follows normally causes the repeated item to be evaluated
+again to see if a different number of repeats allows the
+rest of the pattern to match. Sometimes it is useful to
+prevent this, either to change the nature of the match, or
+to cause it fail earlier than it otherwise might, when the
+author of the pattern knows there is no point in carrying
+on.
+
+Consider, for example, the pattern @code{\d+foo} when applied to
+the subject line
+
+@example
+ 123456bar
+@end example
+
+After matching all 6 digits and then failing to match @samp{foo},
+the normal action of the matcher is to try again with only 5
+digits matching the @code{\d+} item, and then with 4, and so on,
+before ultimately failing. Non-backtracking subpatterns
+provide the means for specifying that once a portion of the
+pattern has matched, it is not to be re-evaluated in this way,
+so the matcher would give up immediately on failing to match
+@samp{foo} the first time. The notation is another kind of special
+parenthesis, starting with @code{(?>} as in this example:
+
+@example
+ (?>\d+)bar
+@end example
+
+This kind of parenthesis ``locks up'' the part of the pattern
+it contains once it has matched, and a failure further into
+the pattern is prevented from backtracking into it.
+Backtracking past it to previous items, however, works as
+normal.
+
+Non-backtracking subpatterns are not capturing subpatterns. Simple
+cases such as the above example can be thought of as a maximizing
+repeat that must swallow everything it can. So,
+while both @code{\d+} and @code{\d+?} are prepared to adjust the number of
+digits they match in order to make the rest of the pattern
+match, @code{(?>\d+)} can only match an entire sequence of digits.
+
+This construction can of course contain arbitrarily complicated
+subpatterns, and it can be nested.
+
+@cindex Perl-style regular expressions, lookbehind subpatterns
+Non-backtracking subpatterns can be used in conjunction with look-behind
+assertions to specify efficient matching at the end
+of the subject string. Consider a simple pattern such as
+
+@example
+ abcd$
+@end example
+
+@noindent
+when applied to a long string which does not match. Because
+matching proceeds from left to right, @command{sed} will look for
+each @samp{a} in the subject and then see if what follows matches
+the rest of the pattern. If the pattern is specified as
+
+@example
+ ^.*abcd$
+@end example
+
+@noindent
+the initial @code{.*} matches the entire string at first, but when
+this fails (because there is no following @samp{a}), it backtracks
+to match all but the last character, then all but the
+last two characters, and so on. Once again the search for
+@samp{a} covers the entire string, from right to left, so we are
+no better off. However, if the pattern is written as
+
+@example
+ ^(?>.*)(?<=abcd)
+@end example
+
+there can be no backtracking for the .* item; it can match
+only the entire string. The subsequent lookbehind assertion
+does a single test on the last four characters. If it fails,
+the match fails immediately. For long strings, this approach
+makes a significant difference to the processing time.
+
+When a pattern contains an unlimited repeat inside a subpattern
+that can itself be repeated an unlimited number of
+times, the use of a once-only subpattern is the only way to
+avoid some failing matches taking a very long time
+indeed.@footnote{Actually, the matcher embedded in @value{SSED}
+ tries to do something for this in the simplest cases,
+ like @code{([^b]*b)*}. These cases are actually quite
+ common: they happen for example in a regular expression
+ like @code{\/\*([^*]*\*)*\/} which matches C comments.}
+
+The pattern
+
+@example
+ (\D+|<\d+>)*[!?]
+@end example
+
+([^0-9<]+<(\d+>)?)*[!?]
+
+@noindent
+matches an unlimited number of substrings that either consist
+of non-digits, or digits enclosed in angular brackets, followed by
+an exclamation or question mark. When it matches, it runs quickly.
+However, if it is applied to
+
+@example
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@end example
+
+@noindent
+it takes a long time before reporting failure. This is
+because the string can be divided between the two repeats in
+a large number of ways, and all have to be tried.@footnote{The
+example used @code{[!?]} rather than a single character at the end,
+because both @value{SSED} and Perl have an optimization that allows
+for fast failure when a single character is used. They
+remember the last single character that is required for a
+match, and fail early if it is not present in the string.}
+
+If the pattern is changed to
+
+@example
+ ((?>\D+)|<\d+>)*[!?]
+@end example
+
+sequences of non-digits cannot be broken, and failure happens
+quickly.
+
+@node Conditional subpatterns
+@appendixsec Conditional subpatterns
+@cindex Perl-style regular expressions, conditional subpatterns
+
+It is possible to cause the matching process to obey a subpattern
+conditionally or to choose between two alternative
+subpatterns, depending on the result of an assertion, or
+whether a previous capturing subpattern matched or not. The
+two possible forms of conditional subpattern are
+
+@example
+ (?(@var{condition})@var{yes-pattern})
+ (?(@var{condition})@var{yes-pattern}|@var{no-pattern})
+@end example
+
+If the condition is satisfied, the yes-pattern is used; otherwise
+the no-pattern (if present) is used. If there are more than two
+alternatives in the subpattern, a compile-time error occurs.
+
+There are two kinds of condition. If the text between the
+parentheses consists of a sequence of digits, the condition
+is satisfied if the capturing subpattern of that number has
+previously matched. The number must be greater than zero.
+Consider the following pattern, which contains non-significant
+white space to make it more readable (assume the @code{X} modifier)
+and to divide it into three parts for ease of discussion:
+
+@example
+ ( \( )? [^()]+ (?(1) \) )
+@end example
+
+The first part matches an optional opening parenthesis, and
+if that character is present, sets it as the first captured
+substring. The second part matches one or more characters
+that are not parentheses. The third part is a conditional
+subpattern that tests whether the first set of parentheses
+matched or not. If they did, that is, if subject started
+with an opening parenthesis, the condition is true, and so
+the yes-pattern is executed and a closing parenthesis is
+required. Otherwise, since no-pattern is not present, the
+subpattern matches nothing. In other words, this pattern
+matches a sequence of non-parentheses, optionally enclosed
+in parentheses.
+
+@cindex Perl-style regular expressions, lookahead subpatterns
+If the condition is not a sequence of digits, it must be an
+assertion. This may be a positive or negative lookahead or
+lookbehind assertion. Consider this pattern, again containing
+non-significant white space, and with the two alternatives
+on the second line:
+
+@example
+ (?(?=...[a-z])
+ \d\d-[a-z]@{3@}-\d\d |
+ \d\d-\d\d-\d\d )
+@end example
+
+The condition is a positive lookahead assertion that matches
+a letter that is three characters away from the current point.
+If a letter is found, the subject is matched against the first
+alternative @samp{@var{dd}-@var{aaa}-@var{dd}} (where @var{aaa} are
+letters and @var{dd} are digits); otherwise it is matched against
+the second alternative, @samp{@var{dd}-@var{dd}-@var{dd}}.
+
+
+@node Recursive patterns
+@appendixsec Recursive patterns
+@cindex Perl-style regular expressions, recursive patterns
+@cindex Perl-style regular expressions, recursion
+
+Consider the problem of matching a string in parentheses,
+allowing for unlimited nested parentheses. Without the use
+of recursion, the best that can be done is to use a pattern
+that matches up to some fixed depth of nesting. It is not
+possible to handle an arbitrary nesting depth. Perl 5.6 has
+provided an experimental facility that allows regular
+expressions to recurse (amongst other things). It does this
+by interpolating Perl code in the expression at run time,
+and the code can refer to the expression itself. A Perl pattern
+tern to solve the parentheses problem can be created like
+this:
+
+@example
+ $re = qr@{\( (?: (?>[^()]+) | (?p@{$re@}) )* \)@}x;
+@end example
+
+The @code{(?p@{...@})} item interpolates Perl code at run time,
+and in this case refers recursively to the pattern in which it
+appears. Obviously, @command{sed} cannot support the interpolation of
+Perl code. Instead, the special item @code{(?R)} is provided for
+the specific case of recursion. This pattern solves the
+parentheses problem (assume the @code{X} modifier option is used
+so that white space is ignored):
+
+@example
+ \( ( (?>[^()]+) | (?R) )* \)
+@end example
+
+First it matches an opening parenthesis. Then it matches any
+number of substrings which can either be a sequence of
+non-parentheses, or a recursive match of the pattern itself
+(i.e. a correctly parenthesized substring). Finally there is
+a closing parenthesis.
+
+This particular example pattern contains nested unlimited
+repeats, and so the use of a non-backtracking subpattern for
+matching strings of non-parentheses is important when applying
+the pattern to strings that do not match. For example, when
+it is applied to
+
+@example
+ (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+@end example
+
+it yields a ``no match'' response quickly. However, if a
+standard backtracking subpattern is not used, the match runs
+for a very long time indeed because there are so many different
+ways the @code{+} and @code{*} repeats can carve up the subject,
+and all have to be tested before failure can be reported.
+
+The values set for any capturing subpatterns are those from
+the outermost level of the recursion at which the subpattern
+value is set. If the pattern above is matched against
+
+@example
+ (ab(cd)ef)
+@end example
+
+@noindent
+the value for the capturing parentheses is @samp{ef}, which is
+the last value taken on at the top level.
+
+@node Comments
+@appendixsec Comments
+@cindex Perl-style regular expressions, comments
+
+The sequence (?# marks the start of a comment which continues
+ues up to the next closing parenthesis. Nested parentheses
+are not permitted. The characters that make up a comment
+play no part in the pattern matching at all.
+
+@cindex Perl-style regular expressions, extended
+If the @code{X} modifier option is used, an unescaped @code{#} character
+outside a character class introduces a comment that continues
+up to the next newline character in the pattern.
+@end ifset
+
+
+@page
+@node Concept Index
+@unnumbered Concept Index
+
+This is a general index of all issues discussed in this manual, with the
+exception of the @command{sed} commands and command-line options.
+
+@printindex cp
+
+@page
+@node Command and Option Index
+@unnumbered Command and Option Index
+
+This is an alphabetical list of all @command{sed} commands and command-line
+options.
+
+@printindex fn
+
+@contents
+@bye
+
+@c XXX FIXME: the term "cycle" is never defined...
diff --git a/doc/sed.1 b/doc/sed.1
new file mode 100644
index 0000000..2b9c88c
--- /dev/null
+++ b/doc/sed.1
@@ -0,0 +1,374 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.28.
+.TH SED "1" "August 2004" "sed version 4.1.2" "User Commands"
+.SH NAME
+sed \- manual page for sed version 4.1.2
+.SH SYNOPSIS
+.B sed
+[\fIOPTION\fR]... \fI{script-only-if-no-other-script} \fR[\fIinput-file\fR]...
+.SH DESCRIPTION
+.ds sd \fIsed\fP
+.ds Sd \fISed\fP
+\*(Sd is a stream editor.
+A stream editor is used to perform basic text
+transformations on an input stream
+(a file or input from a pipeline).
+While in some ways similar to an editor which
+permits scripted edits (such as \fIed\fP),
+\*(sd works by making only one pass over the
+input(s), and is consequently more efficient.
+But it is \*(sd's ability to filter text in a pipeline
+which particularly distinguishes it from other types of
+editors.
+.HP
+\fB\-n\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR
+.IP
+suppress automatic printing of pattern space
+.HP
+\fB\-e\fR script, \fB\-\-expression\fR=\fIscript\fR
+.IP
+add the script to the commands to be executed
+.HP
+\fB\-f\fR script-file, \fB\-\-file\fR=\fIscript\-file\fR
+.IP
+add the contents of script-file to the commands to be executed
+.HP
+\fB\-i[SUFFIX]\fR, \fB\-\-in\-place\fR[=\fISUFFIX\fR]
+.IP
+edit files in place (makes backup if extension supplied)
+.HP
+\fB\-l\fR N, \fB\-\-line\-length\fR=\fIN\fR
+.IP
+specify the desired line-wrap length for the `l' command
+.HP
+\fB\-\-posix\fR
+.IP
+disable all GNU extensions.
+.HP
+\fB\-r\fR, \fB\-\-regexp\-extended\fR
+.IP
+use extended regular expressions in the script.
+.HP
+\fB\-s\fR, \fB\-\-separate\fR
+.IP
+consider files as separate rather than as a single continuous
+long stream.
+.HP
+\fB\-u\fR, \fB\-\-unbuffered\fR
+.IP
+load minimal amounts of data from the input files and flush
+the output buffers more often
+.TP
+\fB\-\-help\fR
+display this help and exit
+.TP
+\fB\-\-version\fR
+output version information and exit
+.PP
+If no \fB\-e\fR, \fB\-\-expression\fR, \fB\-f\fR, or \fB\-\-file\fR option is given, then the first
+non-option argument is taken as the sed script to interpret. All
+remaining arguments are names of input files; if no input files are
+specified, then the standard input is read.
+.PP
+E-mail bug reports to: bonzini@gnu.org .
+Be sure to include the word ``sed'' somewhere in the ``Subject:'' field.
+.SH "COMMAND SYNOPSIS"
+This is just a brief synopsis of \*(sd commands to serve as
+a reminder to those who already know \*(sd;
+other documentation (such as the texinfo document)
+must be consulted for fuller descriptions.
+.SS
+Zero-address ``commands''
+.TP
+.RI :\ label
+Label for
+.B b
+and
+.B t
+commands.
+.TP
+.RI # comment
+The comment extends until the next newline (or the end of a
+.B -e
+script fragment).
+.TP
+}
+The closing bracket of a { } block.
+.SS
+Zero- or One- address commands
+.TP
+=
+Print the current line number.
+.TP
+a \e
+.TP
+.I text
+Append
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+i \e
+.TP
+.I text
+Insert
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+q
+Immediately quit the \*(sd script without processing
+any more input,
+except that if auto-print is not disabled
+the current pattern space will be printed.
+.TP
+Q
+Immediately quit the \*(sd script without processing
+any more input.
+.TP
+.RI r\ filename
+Append text read from
+.IR filename .
+.TP
+.RI R\ filename
+Append a line read from
+.IR filename .
+.SS
+Commands which accept address ranges
+.TP
+{
+Begin a block of commands (end with a }).
+.TP
+.RI b\ label
+Branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+.RI t\ label
+If a s/// has done a successful substitution since the
+last input line was read and since the last t or T
+command, then branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+.RI T\ label
+If no s/// has done a successful substitution since the
+last input line was read and since the last t or T
+command, then branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+c \e
+.TP
+.I text
+Replace the selected lines with
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+d
+Delete pattern space.
+Start next cycle.
+.TP
+D
+Delete up to the first embedded newline in the pattern space.
+Start next cycle, but skip reading from the input
+if there is still data in the pattern space.
+.TP
+h H
+Copy/append pattern space to hold space.
+.TP
+g G
+Copy/append hold space to pattern space.
+.TP
+x
+Exchange the contents of the hold and pattern spaces.
+.TP
+l
+List out the current line in a ``visually unambiguous'' form.
+.TP
+n N
+Read/append the next line of input into the pattern space.
+.TP
+p
+Print the current pattern space.
+.TP
+P
+Print up to the first embedded newline of the current pattern space.
+.TP
+.RI s/ regexp / replacement /
+Attempt to match
+.I regexp
+against the pattern space.
+If successful, replace that portion matched
+with
+.IR replacement .
+The
+.I replacement
+may contain the special character
+.B &
+to refer to that portion of the pattern space which matched,
+and the special escapes \e1 through \e9 to refer to the
+corresponding matching sub-expressions in the
+.IR regexp .
+.TP
+.RI w\ filename
+Write the current pattern space to
+.IR filename .
+.TP
+.RI W\ filename
+Write the first line of the current pattern space to
+.IR filename .
+.TP
+.RI y/ source / dest /
+Transliterate the characters in the pattern space which appear in
+.I source
+to the corresponding character in
+.IR dest .
+.SH
+Addresses
+\*(Sd commands can be given with no addresses, in which
+case the command will be executed for all input lines;
+with one address, in which case the command will only be executed
+for input lines which match that address; or with two
+addresses, in which case the command will be executed
+for all input lines which match the inclusive range of
+lines starting from the first address and continuing to
+the second address.
+Three things to note about address ranges:
+the syntax is
+.IR addr1 , addr2
+(i.e., the addresses are separated by a comma);
+the line which
+.I addr1
+matched will always be accepted,
+even if
+.I addr2
+selects an earlier line;
+and if
+.I addr2
+is a
+.IR regexp ,
+it will not be tested against the line that
+.I addr1
+matched.
+.PP
+After the address (or address-range),
+and before the command, a
+.B !
+may be inserted,
+which specifies that the command shall only be
+executed if the address (or address-range) does
+.B not
+match.
+.PP
+The following address types are supported:
+.TP
+.I number
+Match only the specified line
+.IR number .
+.TP
+.IR first ~ step
+Match every
+.IR step 'th
+line starting with line
+.IR first .
+For example, ``sed -n 1~2p'' will print all the odd-numbered lines in
+the input stream, and the address 2~5 will match every fifth line,
+starting with the second. (This is an extension.)
+.TP
+$
+Match the last line.
+.TP
+.RI / regexp /
+Match lines matching the regular expression
+.IR regexp .
+.TP
+.BI \fR\e\fPc regexp c
+Match lines matching the regular expression
+.IR regexp .
+The
+.B c
+may be any character.
+.PP
+GNU \*(sd also supports some special 2-address forms:
+.TP
+.RI 0, addr2
+Start out in "matched first address" state, until
+.I addr2
+is found.
+This is similar to
+.RI 1, addr2 ,
+except that if
+.I addr2
+matches the very first line of input the
+.RI 0, addr2
+form will be at the end of its range, whereas the
+.RI 1, addr2
+form will still be at the beginning of its range.
+.TP
+.IR addr1 ,+ N
+Will match
+.I addr1
+and the
+.I N
+lines following
+.IR addr1 .
+.TP
+.IR addr1 ,~ N
+Will match
+.I addr1
+and the lines following
+.I addr1
+until the next line whose input line number is a multiple of
+.IR N .
+.SH "REGULAR EXPRESSIONS"
+POSIX.2 BREs
+.I should
+be supported, but they aren't completely because of performance
+problems.
+The
+.B \en
+sequence in a regular expression matches the newline character,
+and similarly for
+.BR \ea ,
+.BR \et ,
+and other sequences.
+.SH BUGS
+.PP
+E-mail bug reports to
+.BR bonzini@gnu.org .
+Be sure to include the word ``sed'' somewhere in the ``Subject:'' field.
+Also, please include the output of ``sed --version'' in the body
+of your report if at all possible.
+.SH COPYRIGHT
+Copyright \(co 2003 Free Software Foundation, Inc.
+.br
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,
+to the extent permitted by law.
+.SH "SEE ALSO"
+.BR awk (1),
+.BR ed (1),
+.BR grep (1),
+.BR tr (1),
+.BR perlre (1),
+sed.info,
+any of various books on \*(sd,
+.na
+the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.html),
+http://sed.sf.net/grabbag/.
+.PP
+The full documentation for
+.B sed
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B sed
+programs are properly installed at your site, the command
+.IP
+.B info sed
+.PP
+should give you access to the complete manual.
diff --git a/doc/sed.texi b/doc/sed.texi
new file mode 100644
index 0000000..1225c51
--- /dev/null
+++ b/doc/sed.texi
@@ -0,0 +1,4193 @@
+\input texinfo @c -*-texinfo-*-
+@c Do not edit this file!! It is automatically generated from sed-in.texi.
+@c
+@c -- Stuff that needs adding: ----------------------------------------------
+@c (document the `;' command-separator)
+@c --------------------------------------------------------------------------
+@c Check for consistency: regexps in @code, text that they match in @samp.
+@c
+@c Tips:
+@c @command for command
+@c @samp for command fragments: @samp{cat -s}
+@c @code for sed commands and flags
+@c Use ``quote'' not `quote' or "quote".
+@c
+@c %**start of header
+@setfilename sed.info
+@settitle sed, a stream editor
+@c %**end of header
+
+@c @smallbook
+
+@include version.texi
+
+@c Combine indices.
+@syncodeindex ky cp
+@syncodeindex pg cp
+@syncodeindex tp cp
+
+@defcodeindex op
+@syncodeindex op fn
+
+@include config.texi
+
+@copying
+This file documents version @value{VERSION} of
+@value{SSED}, a stream editor.
+
+Copyright @copyright{} 1998, 1999, 2001, 2002, 2003, 2004 Free
+Software Foundation, Inc.
+
+This document is released under the terms of the @acronym{GNU} Free
+Documentation License as published by the Free Software Foundation;
+either version 1.1, or (at your option) any later version.
+
+You should have received a copy of the @acronym{GNU} Free Documentation
+License along with @value{SSED}; see the file @file{COPYING.DOC}.
+If not, write to the Free Software Foundation, 59 Temple Place - Suite
+330, Boston, MA 02111-1307, USA.
+
+There are no Cover Texts and no Invariant Sections; this text, along
+with its equivalent in the printed manual, constitutes the Title Page.
+@end copying
+
+@setchapternewpage off
+
+@titlepage
+@title @command{sed}, a stream editor
+@subtitle version @value{VERSION}, @value{UPDATED}
+@author by Ken Pizzini, Paolo Bonzini
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc.
+
+@insertcopying
+
+Published by the Free Software Foundation, @*
+59 Temple Place - Suite 330 @*
+Boston, MA 02111-1307, USA
+@end titlepage
+
+
+@node Top
+@top
+
+@ifnottex
+@insertcopying
+@end ifnottex
+
+@menu
+* Introduction:: Introduction
+* Invoking sed:: Invocation
+* sed Programs:: @command{sed} programs
+* Examples:: Some sample scripts
+* Limitations:: Limitations and (non-)limitations of @value{SSED}
+* Other Resources:: Other resources for learning about @command{sed}
+* Reporting Bugs:: Reporting bugs
+
+* Extended regexps:: @command{egrep}-style regular expressions
+@ifset PERL
+* Perl regexps:: Perl-style regular expressions
+@end ifset
+
+* Concept Index:: A menu with all the topics in this manual.
+* Command and Option Index:: A menu with all @command{sed} commands and
+ command-line options.
+
+@detailmenu
+--- The detailed node listing ---
+
+sed Programs:
+* Execution Cycle:: How @command{sed} works
+* Addresses:: Selecting lines with @command{sed}
+* Regular Expressions:: Overview of regular expression syntax
+* Common Commands:: Often used commands
+* The "s" Command:: @command{sed}'s Swiss Army Knife
+* Other Commands:: Less frequently used commands
+* Programming Commands:: Commands for @command{sed} gurus
+* Extended Commands:: Commands specific of @value{SSED}
+* Escapes:: Specifying special characters
+
+Examples:
+* Centering lines::
+* Increment a number::
+* Rename files to lower case::
+* Print bash environment::
+* Reverse chars of lines::
+* tac:: Reverse lines of files
+* cat -n:: Numbering lines
+* cat -b:: Numbering non-blank lines
+* wc -c:: Counting chars
+* wc -w:: Counting words
+* wc -l:: Counting lines
+* head:: Printing the first lines
+* tail:: Printing the last lines
+* uniq:: Make duplicate lines unique
+* uniq -d:: Print duplicated lines of input
+* uniq -u:: Remove all duplicated lines
+* cat -s:: Squeezing blank lines
+
+@ifset PERL
+Perl regexps:: Perl-style regular expressions
+* Backslash:: Introduces special sequences
+* Circumflex/dollar sign/period:: Behave specially with regard to new lines
+* Square brackets:: Are a bit different in strange cases
+* Options setting:: Toggle modifiers in the middle of a regexp
+* Non-capturing subpatterns:: Are not counted when backreferencing
+* Repetition:: Allows for non-greedy matching
+* Backreferences:: Allows for more than 10 back references
+* Assertions:: Allows for complex look ahead matches
+* Non-backtracking subpatterns:: Often gives more performance
+* Conditional subpatterns:: Allows if/then/else branches
+* Recursive patterns:: For example to match parentheses
+* Comments:: Because things can get complex...
+@end ifset
+
+@end detailmenu
+@end menu
+
+
+@node Introduction
+@chapter Introduction
+
+@cindex Stream editor
+@command{sed} is a stream editor.
+A stream editor is used to perform basic text
+transformations on an input stream
+(a file or input from a pipeline).
+While in some ways similar to an editor which
+permits scripted edits (such as @command{ed}),
+@command{sed} works by making only one pass over the
+input(s), and is consequently more efficient.
+But it is @command{sed}'s ability to filter text in a pipeline
+which particularly distinguishes it from other types of
+editors.
+
+
+@node Invoking sed
+@chapter Invocation
+
+Normally @command{sed} is invoked like this:
+
+@example
+sed SCRIPT INPUTFILE...
+@end example
+
+The full format for invoking @command{sed} is:
+
+@example
+sed OPTIONS... [SCRIPT] [INPUTFILE...]
+@end example
+
+If you do not specify @var{INPUTFILE}, or if @var{INPUTFILE} is @file{-},
+@command{sed} filters the contents of the standard input. The @var{script}
+is actually the first non-option parameter, which @command{sed} specially
+considers a script and not an input file if (and only if) none of the
+other @var{options} specifies a script to be executed, that is if neither
+of the @option{-e} and @option{-f} options is specified.
+
+@command{sed} may be invoked with the following command-line options:
+
+@table @code
+@item --version
+@opindex --version
+@cindex Version, printing
+Print out the version of @command{sed} that is being run and a copyright notice,
+then exit.
+
+@item --help
+@opindex --help
+@cindex Usage summary, printing
+Print a usage message briefly summarizing these command-line options
+and the bug-reporting address,
+then exit.
+
+@item -n
+@itemx --quiet
+@itemx --silent
+@opindex -n
+@opindex --quiet
+@opindex --silent
+@cindex Disabling autoprint, from command line
+By default, @command{sed} prints out the pattern space
+at the end of each cycle through the script.
+These options disable this automatic printing,
+and @command{sed} only produces output when explicitly told to
+via the @code{p} command.
+
+@item -i[@var{SUFFIX}]
+@itemx --in-place[=@var{SUFFIX}]
+@opindex -i
+@opindex --in-place
+@cindex In-place editing, activating
+@cindex @value{SSEDEXT}, in-place editing
+This option specifies that files are to be edited in-place.
+@value{SSED} does this by creating a temporary file and
+sending output to this file rather than to the standard
+output.@footnote{This applies to commands such as @code{=},
+@code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can
+still write to the standard output by using the @code{w}
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+or @code{W} commands together with the @file{/dev/stdout}
+special file}.
+
+This option implies @option{-s}.
+
+When the end of the file is reached, the temporary file is
+renamed to the output file's original name. The extension,
+if supplied, is used to modify the name of the old file
+before renaming the temporary file, thereby making a backup
+copy@footnote{Note that @value{SSED} creates the backup
+ file whether or not any output is actually changed.}).
+
+@cindex In-place editing, Perl-style backup file names
+This rule is followed: if the extension doesn't contain a @code{*},
+then it is appended to the end of the current filename as a
+suffix; if the extension does contain one or more @code{*}
+characters, then @emph{each} asterisk is replaced with the
+current filename. This allows you to add a prefix to the
+backup file, instead of (or in addition to) a suffix, or
+even to place backup copies of the original files into another
+directory (provided the directory already exists).
+
+If no extension is supplied, the original file is
+overwritten without making a backup.
+
+@item -l @var{N}
+@itemx --line-length=@var{N}
+@opindex -l
+@opindex --line-length
+@cindex Line length, setting
+Specify the default line-wrap length for the @code{l} command.
+A length of 0 (zero) means to never wrap long lines. If
+not specified, it is taken to be 70.
+
+@item --posix
+@cindex @value{SSEDEXT}, disabling
+@value{SSED} includes several extensions to @acronym{POSIX}
+sed. In order to simplify writing portable scripts, this
+option disables all the extensions that this manual documents,
+including additional commands.
+@cindex @code{POSIXLY_CORRECT} behavior, enabling
+Most of the extensions accept @command{sed} programs that
+are outside the syntax mandated by @acronym{POSIX}, but some
+of them (such as the behavior of the @command{N} command
+described in @pxref{Reporting Bugs}) actually violate the
+standard. If you want to disable only the latter kind of
+extension, you can set the @code{POSIXLY_CORRECT} variable
+to a non-empty value.
+
+@item -r
+@itemx --regexp-extended
+@opindex -r
+@opindex --regexp-extended
+@cindex Extended regular expressions, choosing
+@cindex @acronym{GNU} extensions, extended regular expressions
+Use extended regular expressions rather than basic
+regular expressions. Extended regexps are those that
+@command{egrep} accepts; they can be clearer because they
+usually have less backslashes, but are a @acronym{GNU} extension
+and hence scripts that use them are not portable.
+@xref{Extended regexps, , Extended regular expressions}.
+
+@ifset PERL
+@item -R
+@itemx --regexp-perl
+@opindex -R
+@opindex --regexp-perl
+@cindex Perl-style regular expressions, choosing
+@cindex @value{SSEDEXT}, Perl-style regular expressions
+Use Perl-style regular expressions rather than basic
+regular expressions. Perl-style regexps are extremely
+powerful but are a @value{SSED} extension and hence scripts that
+use it are not portable. @xref{Perl regexps, ,
+Perl-style regular expressions}.
+@end ifset
+
+@item -s
+@itemx --separate
+@cindex Working on separate files
+By default, @command{sed} will consider the files specified on the
+command line as a single continuous long stream. This @value{SSED}
+extension allows the user to consider them as separate files:
+range addresses (such as @samp{/abc/,/def/}) are not allowed
+to span several files, line numbers are relative to the start
+of each file, @code{$} refers to the last line of each file,
+and files invoked from the @code{R} commands are rewound at the
+start of each file.
+
+@item -u
+@itemx --unbuffered
+@opindex -u
+@opindex --unbuffered
+@cindex Unbuffered I/O, choosing
+Buffer both input and output as minimally as practical.
+(This is particularly useful if the input is coming from
+the likes of @samp{tail -f}, and you wish to see the transformed
+output as soon as possible.)
+
+@item -e @var{script}
+@itemx --expression=@var{script}
+@opindex -e
+@opindex --expression
+@cindex Script, from command line
+Add the commands in @var{script} to the set of commands to be
+run while processing the input.
+
+@item -f @var{script-file}
+@itemx --file=@var{script-file}
+@opindex -f
+@opindex --file
+@cindex Script, from a file
+Add the commands contained in the file @var{script-file}
+to the set of commands to be run while processing the input.
+
+@end table
+
+If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file}
+options are given on the command-line,
+then the first non-option argument on the command line is
+taken to be the @var{script} to be executed.
+
+@cindex Files to be processed as input
+If any command-line parameters remain after processing the above,
+these parameters are interpreted as the names of input files to
+be processed.
+@cindex Standard input, processing as input
+A file name of @samp{-} refers to the standard input stream.
+The standard input will be processed if no file names are specified.
+
+
+@node sed Programs
+@chapter @command{sed} Programs
+
+@cindex @command{sed} program structure
+@cindex Script structure
+A @command{sed} program consists of one or more @command{sed} commands,
+passed in by one or more of the
+@option{-e}, @option{-f}, @option{--expression}, and @option{--file}
+options, or the first non-option argument if zero of these
+options are used.
+This document will refer to ``the'' @command{sed} script;
+this is understood to mean the in-order catenation
+of all of the @var{script}s and @var{script-file}s passed in.
+
+Each @code{sed} command consists of an optional address or
+address range, followed by a one-character command name
+and any additional command-specific code.
+
+@menu
+* Execution Cycle:: How @command{sed} works
+* Addresses:: Selecting lines with @command{sed}
+* Regular Expressions:: Overview of regular expression syntax
+* Common Commands:: Often used commands
+* The "s" Command:: @command{sed}'s Swiss Army Knife
+* Other Commands:: Less frequently used commands
+* Programming Commands:: Commands for @command{sed} gurus
+* Extended Commands:: Commands specific of @value{SSED}
+* Escapes:: Specifying special characters
+@end menu
+
+
+@node Execution Cycle
+@section How @command{sed} Works
+
+@cindex Buffer spaces, pattern and hold
+@cindex Spaces, pattern and hold
+@cindex Pattern space, definition
+@cindex Hold space, definition
+@command{sed} maintains two data buffers: the active @emph{pattern} space,
+and the auxiliary @emph{hold} space. Both are initially empty.
+
+@command{sed} operates by performing the following cycle on each
+lines of input: first, @command{sed} reads one line from the input
+stream, removes any trailing newline, and places it in the pattern space.
+Then commands are executed; each command can have an address associated
+to it: addresses are a kind of condition code, and a command is only
+executed if the condition is verified before the command is to be
+executed.
+
+When the end of the script is reached, unless the @option{-n} option
+is in use, the contents of pattern space are printed out to the output
+stream, adding back the trailing newline if it was removed.@footnote{Actually,
+ if @command{sed} prints a line without the terminating newline, it will
+ nevertheless print the missing newline as soon as more text is sent to
+ the same output stream, which gives the ``least expected surprise''
+ even though it does not make commands like @samp{sed -n p} exactly
+ identical to @command{cat}.} Then the next cycle starts for the next
+input line.
+
+Unless special commands (like @samp{D}) are used, the pattern space is
+deleted between two cycles. The hold space, on the other hand, keeps
+its data between cycles (see commands @samp{h}, @samp{H}, @samp{x},
+@samp{g}, @samp{G} to move data between both buffers).
+
+
+@node Addresses
+@section Selecting lines with @command{sed}
+@cindex Addresses, in @command{sed} scripts
+@cindex Line selection
+@cindex Selecting lines to process
+
+Addresses in a @command{sed} script can be in any of the following forms:
+@table @code
+@item @var{number}
+@cindex Address, numeric
+@cindex Line, selecting by number
+Specifying a line number will match only that line in the input.
+(Note that @command{sed} counts lines continuously across all input files
+unless @option{-i} or @option{-s} options are specified.)
+
+@item @var{first}~@var{step}
+@cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses
+This @acronym{GNU} extension matches every @var{step}th line
+starting with line @var{first}.
+In particular, lines will be selected when there exists
+a non-negative @var{n} such that the current line-number equals
+@var{first} + (@var{n} * @var{step}).
+Thus, to select the odd-numbered lines,
+one would use @code{1~2};
+to pick every third line starting with the second, @samp{2~3} would be used;
+to pick every fifth line starting with the tenth, use @samp{10~5};
+and @samp{50~0} is just an obscure way of saying @code{50}.
+
+@item $
+@cindex Address, last line
+@cindex Last line, selecting
+@cindex Line, selecting last
+This address matches the last line of the last file of input, or
+the last line of each file when the @option{-i} or @option{-s} options
+are specified.
+
+@item /@var{regexp}/
+@cindex Address, as a regular expression
+@cindex Line, selecting by regular expression match
+This will select any line which matches the regular expression @var{regexp}.
+If @var{regexp} itself includes any @code{/} characters,
+each must be escaped by a backslash (@code{\}).
+
+@cindex empty regular expression
+@cindex @value{SSEDEXT}, modifiers and the empty regular expression
+The empty regular expression @samp{//} repeats the last regular
+expression match (the same holds if the empty regular expression is
+passed to the @code{s} command). Note that modifiers to regular expressions
+are evaluated when the regular expression is compiled, thus it is invalid to
+specify them together with the empty regular expression.
+
+@item \%@var{regexp}%
+(The @code{%} may be replaced by any other single character.)
+
+@cindex Slash character, in regular expressions
+This also matches the regular expression @var{regexp},
+but allows one to use a different delimiter than @code{/}.
+This is particularly useful if the @var{regexp} itself contains
+a lot of slashes, since it avoids the tedious escaping of every @code{/}.
+If @var{regexp} itself includes any delimiter characters,
+each must be escaped by a backslash (@code{\}).
+
+@item /@var{regexp}/I
+@itemx \%@var{regexp}%I
+@cindex @acronym{GNU} extensions, @code{I} modifier
+@ifset PERL
+@cindex Perl-style regular expressions, case-insensitive
+@end ifset
+The @code{I} modifier to regular-expression matching is a @acronym{GNU}
+extension which causes the @var{regexp} to be matched in
+a case-insensitive manner.
+
+@item /@var{regexp}/M
+@itemx \%@var{regexp}%M
+@ifset PERL
+@cindex @value{SSEDEXT}, @code{M} modifier
+@end ifset
+@cindex Perl-style regular expressions, multiline
+The @code{M} modifier to regular-expression matching is a @value{SSED}
+extension which causes @code{^} and @code{$} to match respectively
+(in addition to the normal behavior) the empty string after a newline,
+and the empty string before a newline. There are special character
+sequences
+@ifset PERL
+(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'}
+in basic or extended regular expression modes)
+@end ifset
+@ifclear PERL
+(@code{\`} and @code{\'})
+@end ifclear
+which always match the beginning or the end of the buffer.
+@code{M} stands for @cite{multi-line}.
+
+@ifset PERL
+@item /@var{regexp}/S
+@itemx \%@var{regexp}%S
+@cindex @value{SSEDEXT}, @code{S} modifier
+@cindex Perl-style regular expressions, single line
+The @code{S} modifier to regular-expression matching is only valid
+in Perl mode and specifies that the dot character (@code{.}) will
+match the newline character too. @code{S} stands for @cite{single-line}.
+@end ifset
+
+@ifset PERL
+@item /@var{regexp}/X
+@itemx \%@var{regexp}%X
+@cindex @value{SSEDEXT}, @code{X} modifier
+@cindex Perl-style regular expressions, extended
+The @code{X} modifier to regular-expression matching is also
+valid in Perl mode only. If it is used, whitespace in the
+pattern (other than in a character class) and
+characters between a @kbd{#} outside a character class and the
+next newline character are ignored. An escaping backslash
+can be used to include a whitespace or @kbd{#} character as part
+of the pattern.
+@end ifset
+@end table
+
+If no addresses are given, then all lines are matched;
+if one address is given, then only lines matching that
+address are matched.
+
+@cindex Range of lines
+@cindex Several lines, selecting
+An address range can be specified by specifying two addresses
+separated by a comma (@code{,}). An address range matches lines
+starting from where the first address matches, and continues
+until the second address matches (inclusively).
+
+If the second address is a @var{regexp}, then checking for the
+ending match will start with the line @emph{following} the
+line which matched the first address: a range will always
+span at least two lines (except of course if the input stream
+ends).
+
+If the second address is a @var{number} less than (or equal to)
+the line matching the first address, then only the one line is
+matched.
+
+@cindex Special addressing forms
+@cindex Range with start address of zero
+@cindex Zero, as range start address
+@cindex @var{addr1},+N
+@cindex @var{addr1},~N
+@cindex @acronym{GNU} extensions, special two-address forms
+@cindex @acronym{GNU} extensions, @code{0} address
+@cindex @acronym{GNU} extensions, 0,@var{addr2} addressing
+@cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing
+@cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing
+@value{SSED} also supports some special two-address forms; all these
+are @acronym{GNU} extensions:
+@table @code
+@item 0,/@var{regexp}/
+A line number of @code{0} can be used in an address specification like
+@code{0,/@var{regexp}/} so that @command{sed} will try to match
+@var{regexp} in the first input line too. In other words,
+@code{0,/@var{regexp}/} is similar to @code{1,/@var{regexp}/},
+except that if @var{addr2} matches the very first line of input the
+@code{0,/@var{regexp}/} form will consider it to end the range, whereas
+the @code{1,/@var{regexp}/} form will match the beginning of its range and
+hence make the range span up to the @emph{second} occurrence of the
+regular expression.
+
+Note that this is the only place where the @code{0} address makes
+sense; there is no 0-th line and commands which are given the @code{0}
+address in any other way will give an error.
+
+@item @var{addr1},+@var{N}
+Matches @var{addr1} and the @var{N} lines following @var{addr1}.
+
+@item @var{addr1},~@var{N}
+Matches @var{addr1} and the lines following @var{addr1}
+until the next line whose input line number is a multiple of @var{N}.
+@end table
+
+@cindex Excluding lines
+@cindex Selecting non-matching lines
+Appending the @code{!} character to the end of an address
+specification negates the sense of the match.
+That is, if the @code{!} character follows an address range,
+then only lines which do @emph{not} match the address range
+will be selected.
+This also works for singleton addresses,
+and, perhaps perversely, for the null address.
+
+
+@node Regular Expressions
+@section Overview of Regular Expression Syntax
+
+To know how to use @command{sed}, people should understand regular
+expressions (@dfn{regexp} for short). A regular expression
+is a pattern that is matched against a
+subject string from left to right. Most characters are
+@dfn{ordinary}: they stand for
+themselves in a pattern, and match the corresponding characters
+in the subject. As a trivial example, the pattern
+
+@example
+ The quick brown fox
+@end example
+
+@noindent
+matches a portion of a subject string that is identical to
+itself. The power of regular expressions comes from the
+ability to include alternatives and repetitions in the pattern.
+These are encoded in the pattern by the use of @dfn{special characters},
+which do not stand for themselves but instead
+are interpreted in some special way. Here is a brief description
+of regular expression syntax as used in @command{sed}.
+
+@table @code
+@item @var{char}
+A single ordinary character matches itself.
+
+@item *
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Matches a sequence of zero or more instances of matches for the
+preceding regular expression, which must be an ordinary character, a
+special character preceded by @code{\}, a @code{.}, a grouped regexp
+(see below), or a bracket expression. As a @acronym{GNU} extension, a
+postfixed regular expression can also be followed by @code{*}; for
+example, @code{a**} is equivalent to @code{a*}. @acronym{POSIX}
+1003.1-2001 says that @code{*} stands for itself when it appears at
+the start of a regular expression or subexpression, but many
+non@acronym{GNU} implementations do not support this and portable
+scripts should instead use @code{\*} in these contexts.
+
+@item \+
+@cindex @acronym{GNU} extensions, to basic regular expressions
+As @code{*}, but matches one or more. It is a @acronym{GNU} extension.
+
+@item \?
+@cindex @acronym{GNU} extensions, to basic regular expressions
+As @code{*}, but only matches zero or one. It is a @acronym{GNU} extension.
+
+@item \@{@var{i}\@}
+As @code{*}, but matches exactly @var{i} sequences (@var{i} is a
+decimal integer; for portability, keep it between 0 and 255
+inclusive).
+
+@item \@{@var{i},@var{j}\@}
+Matches between @var{i} and @var{j}, inclusive, sequences.
+
+@item \@{@var{i},\@}
+Matches more than or equal to @var{i} sequences.
+
+@item \(@var{regexp}\)
+Groups the inner @var{regexp} as a whole, this is used to:
+
+@itemize @bullet
+@item
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Apply postfix operators, like @code{\(abcd\)*}:
+this will search for zero or more whole sequences
+of @samp{abcd}, while @code{abcd*} would search
+for @samp{abc} followed by zero or more occurrences
+of @samp{d}. Note that support for @code{\(abcd\)*} is
+required by @acronym{POSIX} 1003.1-2001, but many non-@acronym{GNU}
+implementations do not support it and hence it is not universally
+portable.
+
+@item
+Use back references (see below).
+@end itemize
+
+@item .
+Matches any character, including newline.
+
+@item ^
+Matches the null string at beginning of line, i.e. what
+appears after the circumflex must appear at the
+beginning of line. @code{^#include} will match only
+lines where @samp{#include} is the first thing on line---if
+there are spaces before, for example, the match fails.
+@code{^} acts as a special character only at the beginning
+of the regular expression or subexpression (that is,
+after @code{\(} or @code{\|}). Portable scripts should avoid
+@code{^} at the beginning of a subexpression, though, as
+@acronym{POSIX} allows implementations that treat @code{^} as
+an ordinary character in that context.
+
+
+@item $
+It is the same as @code{^}, but refers to end of line.
+@code{$} also acts as a special character only at the end
+of the regular expression or subexpression (that is, before @code{\)}
+or @code{\|}), and its use at the end of a subexpression is not
+portable.
+
+
+@item [@var{list}]
+@itemx [^@var{list}]
+Matches any single character in @var{list}: for example,
+@code{[aeiou]} matches all vowels. A list may include
+sequences like @code{@var{char1}-@var{char2}}, which
+matches any character between (inclusive) @var{char1}
+and @var{char2}.
+
+A leading @code{^} reverses the meaning of @var{list}, so that
+it matches any single character @emph{not} in @var{list}. To include
+@code{]} in the list, make it the first character (after
+the @code{^} if needed), to include @code{-} in the list,
+make it the first or last; to include @code{^} put
+it after the first character.
+
+@cindex @code{POSIXLY_CORRECT} behavior, bracket expressions
+The characters @code{$}, @code{*}, @code{.}, @code{[}, and @code{\}
+are normally not special within @var{list}. For example, @code{[\*]}
+matches either @samp{\} or @samp{*}, because the @code{\} is not
+special here. However, strings like @code{[.ch.]}, @code{[=a=]}, and
+@code{[:space:]} are special within @var{list} and represent collating
+symbols, equivalence classes, and character classes, respectively, and
+@code{[} is therefore special within @var{list} when it is followed by
+@code{.}, @code{=}, or @code{:}. Also, when not in
+@env{POSIXLY_CORRECT} mode, special escapes like @code{\n} and
+@code{\t} are recognized within @var{list}. @xref{Escapes}.
+
+@item @var{regexp1}\|@var{regexp2}
+@cindex @acronym{GNU} extensions, to basic regular expressions
+Matches either @var{regexp1} or @var{regexp2}. Use
+parentheses to use complex alternative regular expressions.
+The matching process tries each alternative in turn, from
+left to right, and the first one that succeeds is used.
+It is a @acronym{GNU} extension.
+
+@item @var{regexp1}@var{regexp2}
+Matches the concatenation of @var{regexp1} and @var{regexp2}.
+Concatenation binds more tightly than @code{\|}, @code{^}, and
+@code{$}, but less tightly than the other regular expression
+operators.
+
+@item \@var{digit}
+Matches the @var{digit}-th @code{\(@dots{}\)} parenthesized
+subexpression in the regular expression. This is called a @dfn{back
+reference}. Subexpressions are implicity numbered by counting
+occurrences of @code{\(} left-to-right.
+
+@item \n
+Matches the newline character.
+
+@item \@var{char}
+Matches @var{char}, where @var{char} is one of @code{$},
+@code{*}, @code{.}, @code{[}, @code{\}, or @code{^}.
+Note that the only C-like
+backslash sequences that you can portably assume to be
+interpreted are @code{\n} and @code{\\}; in particular
+@code{\t} is not portable, and matches a @samp{t} under most
+implementations of @command{sed}, rather than a tab character.
+
+@end table
+
+@cindex Greedy regular expression matching
+Note that the regular expression matcher is greedy, i.e., matches
+are attempted from left to right and, if two or more matches are
+possible starting at the same character, it selects the longest.
+
+@noindent
+Examples:
+@table @samp
+@item abcdef
+Matches @samp{abcdef}.
+
+@item a*b
+Matches zero or more @samp{a}s followed by a single
+@samp{b}. For example, @samp{b} or @samp{aaaaab}.
+
+@item a\?b
+Matches @samp{b} or @samp{ab}.
+
+@item a\+b\+
+Matches one or more @samp{a}s followed by one or more
+@samp{b}s: @samp{ab} is the shortest possible match, but
+other examples are @samp{aaaab} or @samp{abbbbb} or
+@samp{aaaaaabbbbbbb}.
+
+@item .*
+@itemx .\+
+These two both match all the characters in a string;
+however, the first matches every string (including the empty
+string), while the second matches only strings containing
+at least one character.
+
+@item ^main.*(.*)
+his matches a string starting with @samp{main},
+followed by an opening and closing
+parenthesis. The @samp{n}, @samp{(} and @samp{)} need not
+be adjacent.
+
+@item ^#
+This matches a string beginning with @samp{#}.
+
+@item \\$
+This matches a string ending with a single backslash. The
+regexp contains two backslashes for escaping.
+
+@item \$
+Instead, this matches a string consisting of a single dollar sign,
+because it is escaped.
+
+@item [a-zA-Z0-9]
+In the C locale, this matches any @acronym{ASCII} letters or digits.
+
+@item [^ @kbd{tab}]\+
+(Here @kbd{tab} stands for a single tab character.)
+This matches a string of one or more
+characters, none of which is a space or a tab.
+Usually this means a word.
+
+@item ^\(.*\)\n\1$
+This matches a string consisting of two equal substrings separated by
+a newline.
+
+@item .\@{9\@}A$
+This matches nine characters followed by an @samp{A}.
+
+@item ^.\@{15\@}A
+This matches the start of a string that contains 16 characters,
+the last of which is an @samp{A}.
+
+@end table
+
+
+
+@node Common Commands
+@section Often-Used Commands
+
+If you use @command{sed} at all, you will quite likely want to know
+these commands.
+
+@table @code
+@item #
+[No addresses allowed.]
+
+@findex # (comments)
+@cindex Comments, in scripts
+The @code{#} character begins a comment;
+the comment continues until the next newline.
+
+@cindex Portability, comments
+If you are concerned about portability, be aware that
+some implementations of @command{sed} (which are not @sc{posix}
+conformant) may only support a single one-line comment,
+and then only when the very first character of the script is a @code{#}.
+
+@findex -n, forcing from within a script
+@cindex Caveat --- #n on first line
+Warning: if the first two characters of the @command{sed} script
+are @code{#n}, then the @option{-n} (no-autoprint) option is forced.
+If you want to put a comment in the first line of your script
+and that comment begins with the letter @samp{n}
+and you do not want this behavior,
+then be sure to either use a capital @samp{N},
+or place at least one space before the @samp{n}.
+
+@item q [@var{exit-code}]
+This command only accepts a single address.
+
+@findex q (quit) command
+@cindex @value{SSEDEXT}, returning an exit code
+@cindex Quitting
+Exit @command{sed} without processing any more commands or input.
+Note that the current pattern space is printed if auto-print is
+not disabled with the @option{-n} options. The ability to return
+an exit code from the @command{sed} script is a @value{SSED} extension.
+
+@item d
+@findex d (delete) command
+@cindex Text, deleting
+Delete the pattern space;
+immediately start next cycle.
+
+@item p
+@findex p (print) command
+@cindex Text, printing
+Print out the pattern space (to the standard output).
+This command is usually only used in conjunction with the @option{-n}
+command-line option.
+
+@item n
+@findex n (next-line) command
+@cindex Next input line, replace pattern space with
+@cindex Read next input line
+If auto-print is not disabled, print the pattern space,
+then, regardless, replace the pattern space with the next line of input.
+If there is no more input then @command{sed} exits without processing
+any more commands.
+
+@item @{ @var{commands} @}
+@findex @{@} command grouping
+@cindex Grouping commands
+@cindex Command groups
+A group of commands may be enclosed between
+@code{@{} and @code{@}} characters.
+This is particularly useful when you want a group of commands
+to be triggered by a single address (or address-range) match.
+
+@end table
+
+@node The "s" Command
+@section The @code{s} Command
+
+The syntax of the @code{s} (as in substitute) command is
+@samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/}
+characters may be uniformly replaced by any other single
+character within any given @code{s} command. The @code{/}
+character (or whatever other character is used in its stead)
+can appear in the @var{regexp} or @var{replacement}
+only if it is preceded by a @code{\} character.
+
+The @code{s} command is probably the most important in @command{sed}
+and has a lot of different options. Its basic concept is simple:
+the @code{s} command attempts to match the pattern
+space against the supplied @var{regexp}; if the match is
+successful, then that portion of the pattern
+space which was matched is replaced with @var{replacement}.
+
+@cindex Backreferences, in regular expressions
+@cindex Parenthesized substrings
+The @var{replacement} can contain @code{\@var{n}} (@var{n} being
+a number from 1 to 9, inclusive) references, which refer to
+the portion of the match which is contained between the @var{n}th
+@code{\(} and its matching @code{\)}.
+Also, the @var{replacement} can contain unescaped @code{&}
+characters which reference the whole matched portion
+of the pattern space.
+@cindex @value{SSEDEXT}, case modifiers in @code{s} commands
+Finally, as a @value{SSED} extension, you can include a
+special sequence made of a backslash and one of the letters
+@code{L}, @code{l}, @code{U}, @code{u}, or @code{E}.
+The meaning is as follows:
+
+@table @code
+@item \L
+Turn the replacement
+to lowercase until a @code{\U} or @code{\E} is found,
+
+@item \l
+Turn the
+next character to lowercase,
+
+@item \U
+Turn the replacement to uppercase
+until a @code{\L} or @code{\E} is found,
+
+@item \u
+Turn the next character
+to uppercase,
+
+@item \E
+Stop case conversion started by @code{\L} or @code{\U}.
+@end table
+
+To include a literal @code{\}, @code{&}, or newline in the final
+replacement, be sure to precede the desired @code{\}, @code{&},
+or newline in the @var{replacement} with a @code{\}.
+
+@findex s command, option flags
+@cindex Substitution of text, options
+The @code{s} command can be followed by zero or more of the
+following @var{flags}:
+
+@table @code
+@item g
+@cindex Global substitution
+@cindex Replacing all text matching regexp in a line
+Apply the replacement to @emph{all} matches to the @var{regexp},
+not just the first.
+
+@item @var{number}
+@cindex Replacing only @var{n}th match of regexp in a line
+Only replace the @var{number}th match of the @var{regexp}.
+
+@cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command
+@cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command
+Note: the @sc{posix} standard does not specify what should happen
+when you mix the @code{g} and @var{number} modifiers,
+and currently there is no widely agreed upon meaning
+across @command{sed} implementations.
+For @value{SSED}, the interaction is defined to be:
+ignore matches before the @var{number}th,
+and then match and replace all matches from
+the @var{number}th on.
+
+@item p
+@cindex Text, printing after substitution
+If the substitution was made, then print the new pattern space.
+
+Note: when both the @code{p} and @code{e} options are specified,
+the relative ordering of the two produces very different results.
+In general, @code{ep} (evaluate then print) is what you want,
+but operating the other way round can be useful for debugging.
+For this reason, the current version of @value{SSED} interprets
+specially the presence of @code{p} options both before and after
+@code{e}, printing the pattern space before and after evaluation,
+while in general flags for the @code{s} command show their
+effect just once. This behavior, although documented, might
+change in future versions.
+
+@item w @var{file-name}
+@cindex Text, writing to a file after substitution
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+@cindex @value{SSEDEXT}, @file{/dev/stderr} file
+If the substitution was made, then write out the result to the named file.
+As a @value{SSED} extension, two special values of @var{file-name} are
+supported: @file{/dev/stderr}, which writes the result to the standard
+error, and @file{/dev/stdout}, which writes to the standard
+output.@footnote{This is equivalent to @code{p} unless the @option{-i}
+option is being used.}
+
+@item e
+@cindex Evaluate Bourne-shell commands, after substitution
+@cindex Subprocesses
+@cindex @value{SSEDEXT}, evaluating Bourne-shell commands
+@cindex @value{SSEDEXT}, subprocesses
+This command allows one to pipe input from a shell command
+into pattern space. If a substitution was made, the command
+that is found in pattern space is executed and pattern space
+is replaced with its output. A trailing newline is suppressed;
+results are undefined if the command to be executed contains
+a @sc{nul} character. This is a @value{SSED} extension.
+
+@item I
+@itemx i
+@cindex @acronym{GNU} extensions, @code{I} modifier
+@cindex Case-insensitive matching
+@ifset PERL
+@cindex Perl-style regular expressions, case-insensitive
+@end ifset
+The @code{I} modifier to regular-expression matching is a @acronym{GNU}
+extension which makes @command{sed} match @var{regexp} in a
+case-insensitive manner.
+
+@item M
+@itemx m
+@cindex @value{SSEDEXT}, @code{M} modifier
+@ifset PERL
+@cindex Perl-style regular expressions, multiline
+@end ifset
+The @code{M} modifier to regular-expression matching is a @value{SSED}
+extension which causes @code{^} and @code{$} to match respectively
+(in addition to the normal behavior) the empty string after a newline,
+and the empty string before a newline. There are special character
+sequences
+@ifset PERL
+(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'}
+in basic or extended regular expression modes)
+@end ifset
+@ifclear PERL
+(@code{\`} and @code{\'})
+@end ifclear
+which always match the beginning or the end of the buffer.
+@code{M} stands for @cite{multi-line}.
+
+@ifset PERL
+@item S
+@itemx s
+@cindex @value{SSEDEXT}, @code{S} modifier
+@cindex Perl-style regular expressions, single line
+The @code{S} modifier to regular-expression matching is only valid
+in Perl mode and specifies that the dot character (@code{.}) will
+match the newline character too. @code{S} stands for @cite{single-line}.
+@end ifset
+
+@ifset PERL
+@item X
+@itemx x
+@cindex @value{SSEDEXT}, @code{X} modifier
+@cindex Perl-style regular expressions, extended
+The @code{X} modifier to regular-expression matching is also
+valid in Perl mode only. If it is used, whitespace in the
+pattern (other than in a character class) and
+characters between a @kbd{#} outside a character class and the
+next newline character are ignored. An escaping backslash
+can be used to include a whitespace or @kbd{#} character as part
+of the pattern.
+@end ifset
+@end table
+
+
+@node Other Commands
+@section Less Frequently-Used Commands
+
+Though perhaps less frequently used than those in the previous
+section, some very small yet useful @command{sed} scripts can be built with
+these commands.
+
+@table @code
+@item y/@var{source-chars}/@var{dest-chars}/
+(The @code{/} characters may be uniformly replaced by
+any other single character within any given @code{y} command.)
+
+@findex y (transliterate) command
+@cindex Transliteration
+Transliterate any characters in the pattern space which match
+any of the @var{source-chars} with the corresponding character
+in @var{dest-chars}.
+
+Instances of the @code{/} (or whatever other character is used in its stead),
+@code{\}, or newlines can appear in the @var{source-chars} or @var{dest-chars}
+lists, provide that each instance is escaped by a @code{\}.
+The @var{source-chars} and @var{dest-chars} lists @emph{must}
+contain the same number of characters (after de-escaping).
+
+@item a\
+@itemx @var{text}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex a (append text lines) command
+@cindex Appending text after a line
+@cindex Text, appending
+Queue the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output)
+to be output at the end of the current cycle,
+or when the next input line is read.
+
+Escape sequences in @var{text} are processed, so you should
+use @code{\\} in @var{text} to print a single backslash.
+
+As a @acronym{GNU} extension, if between the @code{a} and the newline there is
+other than a whitespace-@code{\} sequence, then the text of this line,
+starting at the first non-whitespace character after the @code{a},
+is taken as the first line of the @var{text} block.
+(This enables a simplification in scripting a one-line add.)
+This extension also works with the @code{i} and @code{c} commands.
+
+@item i\
+@itemx @var{text}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex i (insert text lines) command
+@cindex Inserting text before a line
+@cindex Text, insertion
+Immediately output the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output).
+
+@item c\
+@itemx @var{text}
+@findex c (change to text lines) command
+@cindex Replacing selected lines with other text
+Delete the lines matching the address or address-range,
+and output the lines of text which follow this command
+(each but the last ending with a @code{\},
+which are removed from the output)
+in place of the last line
+(or in place of each line, if no addresses were specified).
+A new cycle is started after this command is done,
+since the pattern space will have been deleted.
+
+@item =
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex = (print line number) command
+@cindex Printing line number
+@cindex Line number, printing
+Print out the current input line number (with a trailing newline).
+
+@item l @var{n}
+@findex l (list unambiguously) command
+@cindex List pattern space
+@cindex Printing text unambiguously
+@cindex Line length, setting
+@cindex @value{SSEDEXT}, setting line length
+Print the pattern space in an unambiguous form:
+non-printable characters (and the @code{\} character)
+are printed in C-style escaped form; long lines are split,
+with a trailing @code{\} character to indicate the split;
+the end of each line is marked with a @code{$}.
+
+@var{n} specifies the desired line-wrap length;
+a length of 0 (zero) means to never wrap long lines. If omitted,
+the default as specified on the command line is used. The @var{n}
+parameter is a @value{SSED} extension.
+
+@item r @var{filename}
+@cindex @value{SSEDEXT}, two addresses supported by most commands
+As a @acronym{GNU} extension, this command accepts two addresses.
+
+@findex r (read file) command
+@cindex Read text from a file
+@cindex @value{SSEDEXT}, @file{/dev/stdin} file
+Queue the contents of @var{filename} to be read and
+inserted into the output stream at the end of the current cycle,
+or when the next input line is read.
+Note that if @var{filename} cannot be read, it is treated as
+if it were an empty file, without any error indication.
+
+As a @value{SSED} extension, the special value @file{/dev/stdin}
+is supported for the file name, which reads the contents of the
+standard input.
+
+@item w @var{filename}
+@findex w (write file) command
+@cindex Write to a file
+@cindex @value{SSEDEXT}, @file{/dev/stdout} file
+@cindex @value{SSEDEXT}, @file{/dev/stderr} file
+Write the pattern space to @var{filename}.
+As a @value{SSED} extension, two special values of @var{file-name} are
+supported: @file{/dev/stderr}, which writes the result to the standard
+error, and @file{/dev/stdout}, which writes to the standard
+output.@footnote{This is equivalent to @code{p} unless the @option{-i}
+option is being used.}
+
+The file will be created (or truncated) before the
+first input line is read; all @code{w} commands
+(including instances of @code{w} flag on successful @code{s} commands)
+which refer to the same @var{filename} are output without
+closing and reopening the file.
+
+@item D
+@findex D (delete first line) command
+@cindex Delete first line from pattern space
+Delete text in the pattern space up to the first newline.
+If any text is left, restart cycle with the resultant
+pattern space (without reading a new line of input),
+otherwise start a normal new cycle.
+
+@item N
+@findex N (append Next line) command
+@cindex Next input line, append to pattern space
+@cindex Append next input line to pattern space
+Add a newline to the pattern space,
+then append the next line of input to the pattern space.
+If there is no more input then @command{sed} exits without processing
+any more commands.
+
+@item P
+@findex P (print first line) command
+@cindex Print first line from pattern space
+Print out the portion of the pattern space up to the first newline.
+
+@item h
+@findex h (hold) command
+@cindex Copy pattern space into hold space
+@cindex Replace hold space with copy of pattern space
+@cindex Hold space, copying pattern space into
+Replace the contents of the hold space with the contents of the pattern space.
+
+@item H
+@findex H (append Hold) command
+@cindex Append pattern space to hold space
+@cindex Hold space, appending from pattern space
+Append a newline to the contents of the hold space,
+and then append the contents of the pattern space to that of the hold space.
+
+@item g
+@findex g (get) command
+@cindex Copy hold space into pattern space
+@cindex Replace pattern space with copy of hold space
+@cindex Hold space, copy into pattern space
+Replace the contents of the pattern space with the contents of the hold space.
+
+@item G
+@findex G (appending Get) command
+@cindex Append hold space to pattern space
+@cindex Hold space, appending to pattern space
+Append a newline to the contents of the pattern space,
+and then append the contents of the hold space to that of the pattern space.
+
+@item x
+@findex x (eXchange) command
+@cindex Exchange hold space with pattern space
+@cindex Hold space, exchange with pattern space
+Exchange the contents of the hold and pattern spaces.
+
+@end table
+
+
+@node Programming Commands
+@section Commands for @command{sed} gurus
+
+In most cases, use of these commands indicates that you are
+probably better off programming in something like @command{awk}
+or Perl. But occasionally one is committed to sticking
+with @command{sed}, and these commands can enable one to write
+quite convoluted scripts.
+
+@cindex Flow of control in scripts
+@table @code
+@item : @var{label}
+[No addresses allowed.]
+
+@findex : (label) command
+@cindex Labels, in scripts
+Specify the location of @var{label} for branch commands.
+In all other respects, a no-op.
+
+@item b @var{label}
+@findex b (branch) command
+@cindex Branch to a label, unconditionally
+@cindex Goto, in scripts
+Unconditionally branch to @var{label}.
+The @var{label} may be omitted, in which case the next cycle is started.
+
+@item t @var{label}
+@findex t (test and branch if successful) command
+@cindex Branch to a label, if @code{s///} succeeded
+@cindex Conditional branch
+Branch to @var{label} only if there has been a successful @code{s}ubstitution
+since the last input line was read or conditional branch was taken.
+The @var{label} may be omitted, in which case the next cycle is started.
+
+@end table
+
+@node Extended Commands
+@section Commands Specific to @value{SSED}
+
+These commands are specific to @value{SSED}, so you
+must use them with care and only when you are sure that
+hindering portability is not evil. They allow you to check
+for @value{SSED} extensions or to do tasks that are required
+quite often, yet are unsupported by standard @command{sed}s.
+
+@table @code
+@item e [@var{command}]
+@findex e (evaluate) command
+@cindex Evaluate Bourne-shell commands
+@cindex Subprocesses
+@cindex @value{SSEDEXT}, evaluating Bourne-shell commands
+@cindex @value{SSEDEXT}, subprocesses
+This command allows one to pipe input from a shell command
+into pattern space. Without parameters, the @code{e} command
+executes the command that is found in pattern space and
+replaces the pattern space with the output; a trailing newline
+is suppressed.
+
+If a parameter is specified, instead, the @code{e} command
+interprets it as a command and sends its output to the output stream
+(like @code{r} does). The command can run across multiple
+lines, all but the last ending with a back-slash.
+
+In both cases, the results are undefined if the command to be
+executed contains a @sc{nul} character.
+
+@item L @var{n}
+@findex L (fLow paragraphs) command
+@cindex Reformat pattern space
+@cindex Reformatting paragraphs
+@cindex @value{SSEDEXT}, reformatting paragraphs
+@cindex @value{SSEDEXT}, @code{L} command
+This @value{SSED} extension fills and joins lines in pattern space
+to produce output lines of (at most) @var{n} characters, like
+@code{fmt} does; if @var{n} is omitted, the default as specified
+on the command line is used. This command is considered a failed
+experiment and unless there is enough request (which seems unlikely)
+will be removed in future versions.
+
+@ignore
+Blank lines, spaces between words, and indentation are
+preserved in the output; successive input lines with different
+indentation are not joined; tabs are expanded to 8 columns.
+
+If the pattern space contains multiple lines, they are joined, but
+since the pattern space usually contains a single line, the behavior
+of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e.,
+it does not join short lines to form longer ones).
+
+@var{n} specifies the desired line-wrap length; if omitted,
+the default as specified on the command line is used.
+@end ignore
+
+@item Q [@var{exit-code}]
+This command only accepts a single address.
+
+@findex Q (silent Quit) command
+@cindex @value{SSEDEXT}, quitting silently
+@cindex @value{SSEDEXT}, returning an exit code
+@cindex Quitting
+This command is the same as @code{q}, but will not print the
+contents of pattern space. Like @code{q}, it provides the
+ability to return an exit code to the caller.
+
+This command can be useful because the only alternative ways
+to accomplish this apparently trivial function are to use
+the @option{-n} option (which can unnecessarily complicate
+your script) or resorting to the following snippet, which
+wastes time by reading the whole file without any visible effect:
+
+@example
+:eat
+$d @i{Quit silently on the last line}
+N @i{Read another line, silently}
+g @i{Overwrite pattern space each time to save memory}
+b eat
+@end example
+
+@item R @var{filename}
+@findex R (read line) command
+@cindex Read text from a file
+@cindex @value{SSEDEXT}, reading a file a line at a time
+@cindex @value{SSEDEXT}, @code{R} command
+@cindex @value{SSEDEXT}, @file{/dev/stdin} file
+Queue a line of @var{filename} to be read and
+inserted into the output stream at the end of the current cycle,
+or when the next input line is read.
+Note that if @var{filename} cannot be read, or if its end is
+reached, no line is appended, without any error indication.
+
+As with the @code{r} command, the special value @file{/dev/stdin}
+is supported for the file name, which reads a line from the
+standard input.
+
+@item T @var{label}
+@findex T (test and branch if failed) command
+@cindex @value{SSEDEXT}, branch if @code{s///} failed
+@cindex Branch to a label, if @code{s///} failed
+@cindex Conditional branch
+Branch to @var{label} only if there have been no successful
+@code{s}ubstitutions since the last input line was read or
+conditional branch was taken. The @var{label} may be omitted,
+in which case the next cycle is started.
+
+@item v @var{version}
+@findex v (version) command
+@cindex @value{SSEDEXT}, checking for their presence
+@cindex Requiring @value{SSED}
+This command does nothing, but makes @command{sed} fail if
+@value{SSED} extensions are not supported, simply because other
+versions of @command{sed} do not implement it. In addition, you
+can specify the version of @command{sed} that your script
+requires, such as @code{4.0.5}. The default is @code{4.0}
+because that is the first version that implemented this command.
+
+This command enables all @value{SSEDEXT} even if
+@env{POSIXLY_CORRECT} is set in the environment.
+
+@item W @var{filename}
+@findex W (write first line) command
+@cindex Write first line to a file
+@cindex @value{SSEDEXT}, writing first line to a file
+Write to the given filename the portion of the pattern space up to
+the first newline. Everything said under the @code{w} command about
+file handling holds here too.
+@end table
+
+@node Escapes
+@section @acronym{GNU} Extensions for Escapes in Regular Expressions
+
+@cindex @acronym{GNU} extensions, special escapes
+Until this chapter, we have only encountered escapes of the form
+@samp{\^}, which tell @command{sed} not to interpret the circumflex
+as a special character, but rather to take it literally. For
+example, @samp{\*} matches a single asterisk rather than zero
+or more backslashes.
+
+@cindex @code{POSIXLY_CORRECT} behavior, escapes
+This chapter introduces another kind of escape@footnote{All
+the escapes introduced here are @acronym{GNU}
+extensions, with the exception of @code{\n}. In basic regular
+expression mode, setting @code{POSIXLY_CORRECT} disables them inside
+bracket expressions.}---that
+is, escapes that are applied to a character or sequence of characters
+that ordinarily are taken literally, and that @command{sed} replaces
+with a special character. This provides a way
+of encoding non-printable characters in patterns in a visible manner.
+There is no restriction on the appearance of non-printing characters
+in a @command{sed} script but when a script is being prepared in the
+shell or by text editing, it is usually easier to use one of
+the following escape sequences than the binary character it
+represents:
+
+The list of these escapes is:
+
+@table @code
+@item \a
+Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7).
+
+@item \f
+Produces or matches a form feed (@sc{ascii} 12).
+
+@item \n
+Produces or matches a newline (@sc{ascii} 10).
+
+@item \r
+Produces or matches a carriage return (@sc{ascii} 13).
+
+@item \t
+Produces or matches a horizontal tab (@sc{ascii} 9).
+
+@item \v
+Produces or matches a so called ``vertical tab'' (@sc{ascii} 11).
+
+@item \c@var{x}
+Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is
+any character. The precise effect of @samp{\c@var{x}} is as follows:
+if @var{x} is a lower case letter, it is converted to upper case.
+Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes
+hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B.
+
+@item \d@var{xxx}
+Produces or matches a character whose decimal @sc{ascii} value is @var{xxx}.
+
+@item \o@var{xxx}
+@ifset PERL
+@item \@var{xxx}
+@end ifset
+Produces or matches a character whose octal @sc{ascii} value is @var{xxx}.
+@ifset PERL
+The syntax without the @code{o} is active in Perl mode, while the one
+with the @code{o} is active in the normal or extended @sc{posix} regular
+expression modes.
+@end ifset
+
+@item \x@var{xx}
+Produces or matches a character whose hexadecimal @sc{ascii} value is @var{xx}.
+@end table
+
+@samp{\b} (backspace) was omitted because of the conflict with
+the existing ``word boundary'' meaning.
+
+Other escapes match a particular character class and are valid only in
+regular expressions:
+
+@table @code
+@item \w
+Matches any ``word'' character. A ``word'' character is any
+letter or digit or the underscore character.
+
+@item \W
+Matches any ``non-word'' character.
+
+@item \b
+Matches a word boundary; that is it matches if the character
+to the left is a ``word'' character and the character to the
+right is a ``non-word'' character, or vice-versa.
+
+@item \B
+Matches everywhere but on a word boundary; that is it matches
+if the character to the left and the character to the right
+are either both ``word'' characters or both ``non-word''
+characters.
+
+@item \`
+Matches only at the start of pattern space. This is different
+from @code{^} in multi-line mode.
+
+@item \'
+Matches only at the end of pattern space. This is different
+from @code{$} in multi-line mode.
+
+@ifset PERL
+@item \G
+Match only at the start of pattern space or, when doing a global
+substitution using the @code{s///g} command and option, at
+the end-of-match position of the prior match. For example,
+@samp{s/\Ga/Z/g} will change an initial run of @code{a}s to
+a run of @code{Z}s
+@end ifset
+@end table
+
+@node Examples
+@chapter Some Sample Scripts
+
+Here are some @command{sed} scripts to guide you in the art of mastering
+@command{sed}.
+
+@menu
+Some exotic examples:
+* Centering lines::
+* Increment a number::
+* Rename files to lower case::
+* Print bash environment::
+* Reverse chars of lines::
+
+Emulating standard utilities:
+* tac:: Reverse lines of files
+* cat -n:: Numbering lines
+* cat -b:: Numbering non-blank lines
+* wc -c:: Counting chars
+* wc -w:: Counting words
+* wc -l:: Counting lines
+* head:: Printing the first lines
+* tail:: Printing the last lines
+* uniq:: Make duplicate lines unique
+* uniq -d:: Print duplicated lines of input
+* uniq -u:: Remove all duplicated lines
+* cat -s:: Squeezing blank lines
+@end menu
+
+@node Centering lines
+@section Centering Lines
+
+This script centers all lines of a file on a 80 columns width.
+To change that width, the number in @code{\@{@dots{}\@}} must be
+replaced, and the number of added spaces also must be changed.
+
+Note how the buffer commands are used to separate parts in
+the regular expressions to be matched---this is a common
+technique.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+@group
+# Put 80 spaces in the buffer
+1 @{
+ x
+ s/^$/ /
+ s/^.*$/&&&&&&&&/
+ x
+@}
+@end group
+
+@group
+# del leading and trailing spaces
+y/@kbd{tab}/ /
+s/^ *//
+s/ *$//
+@end group
+
+@group
+# add a newline and 80 spaces to end of line
+G
+@end group
+
+@group
+# keep first 81 chars (80 + a newline)
+s/^\(.\@{81\@}\).*$/\1/
+@end group
+
+@group
+# \2 matches half of the spaces, which are moved to the beginning
+s/^\(.*\)\n\(.*\)\2/\2\1/
+@end group
+@end example
+@c end---------------------------------------------
+
+@node Increment a number
+@section Increment a Number
+
+This script is one of a few that demonstrate how to do arithmetic
+in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg
+Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator!
+It is distributed together with sed.} but must be done manually.
+
+To increment one number you just add 1 to last digit, replacing
+it by the following digit. There is one exception: when the digit
+is a nine the previous digits must be also incremented until you
+don't have a nine.
+
+This solution by Bruno Haible is very clever and smart because
+it uses a single buffer; if you don't have this limitation, the
+algorithm used in @ref{cat -n, Numbering lines}, is faster.
+It works by replacing trailing nines with an underscore, then
+using multiple @code{s} commands to increment the last digit,
+and then again substituting underscores with zeros.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+/[^0-9]/ d
+
+@group
+# replace all leading 9s by _ (any other character except digits, could
+# be used)
+:d
+s/9\(_*\)$/_\1/
+td
+@end group
+
+@group
+# incr last digit only. The first line adds a most-significant
+# digit of 1 if we have to add a digit.
+#
+# The @code{tn} commands are not necessary, but make the thing
+# faster
+@end group
+
+@group
+s/^\(_*\)$/1\1/; tn
+s/8\(_*\)$/9\1/; tn
+s/7\(_*\)$/8\1/; tn
+s/6\(_*\)$/7\1/; tn
+s/5\(_*\)$/6\1/; tn
+s/4\(_*\)$/5\1/; tn
+s/3\(_*\)$/4\1/; tn
+s/2\(_*\)$/3\1/; tn
+s/1\(_*\)$/2\1/; tn
+s/0\(_*\)$/1\1/; tn
+@end group
+
+@group
+:n
+y/_/0/
+@end group
+@end example
+@c end---------------------------------------------
+
+@node Rename files to lower case
+@section Rename Files to Lower Case
+
+This is a pretty strange use of @command{sed}. We transform text, and
+transform it to be shell commands, then just feed them to shell.
+Don't worry, even worse hacks are done when using @command{sed}; I have
+seen a script converting the output of @command{date} into a @command{bc}
+program!
+
+The main body of this is the @command{sed} script, which remaps the name
+from lower to upper (or vice-versa) and even checks out
+if the remapped name is the same as the original name.
+Note how the script is parameterized using shell
+variables and proper quoting.
+
+@c start-------------------------------------------
+@example
+@group
+#! /bin/sh
+# rename files to lower/upper case...
+#
+# usage:
+# move-to-lower *
+# move-to-upper *
+# or
+# move-to-lower -R .
+# move-to-upper -R .
+#
+@end group
+
+@group
+help()
+@{
+ cat << eof
+Usage: $0 [-n] [-r] [-h] files...
+@end group
+
+@group
+-n do nothing, only see what would be done
+-R recursive (use find)
+-h this message
+files files to remap to lower case
+@end group
+
+@group
+Examples:
+ $0 -n * (see if everything is ok, then...)
+ $0 *
+@end group
+
+ $0 -R .
+
+@group
+eof
+@}
+@end group
+
+@group
+apply_cmd='sh'
+finder='echo "$@@" | tr " " "\n"'
+files_only=
+@end group
+
+@group
+while :
+do
+ case "$1" in
+ -n) apply_cmd='cat' ;;
+ -R) finder='find "$@@" -type f';;
+ -h) help ; exit 1 ;;
+ *) break ;;
+ esac
+ shift
+done
+@end group
+
+@group
+if [ -z "$1" ]; then
+ echo Usage: $0 [-h] [-n] [-r] files...
+ exit 1
+fi
+@end group
+
+@group
+LOWER='abcdefghijklmnopqrstuvwxyz'
+UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+@end group
+
+@group
+case `basename $0` in
+ *upper*) TO=$UPPER; FROM=$LOWER ;;
+ *) FROM=$UPPER; TO=$LOWER ;;
+esac
+@end group
+
+eval $finder | sed -n '
+
+@group
+# remove all trailing slashes
+s/\/*$//
+@end group
+
+@group
+# add ./ if there is no path, only a filename
+/\//! s/^/.\//
+@end group
+
+@group
+# save path+filename
+h
+@end group
+
+@group
+# remove path
+s/.*\///
+@end group
+
+@group
+# do conversion only on filename
+y/'$FROM'/'$TO'/
+@end group
+
+@group
+# now line contains original path+file, while
+# hold space contains the new filename
+x
+@end group
+
+@group
+# add converted file name to line, which now contains
+# path/file-name\nconverted-file-name
+G
+@end group
+
+@group
+# check if converted file name is equal to original file name,
+# if it is, do not print nothing
+/^.*\/\(.*\)\n\1/b
+@end group
+
+@group
+# now, transform path/fromfile\n, into
+# mv path/fromfile path/tofile and print it
+s/^\(.*\/\)\(.*\)\n\(.*\)$/mv \1\2 \1\3/p
+@end group
+
+' | $apply_cmd
+@end example
+@c end---------------------------------------------
+
+@node Print bash environment
+@section Print @command{bash} Environment
+
+This script strips the definition of the shell functions
+from the output of the @command{set} Bourne-shell command.
+
+@c start-------------------------------------------
+@example
+#!/bin/sh
+
+@group
+set | sed -n '
+:x
+@end group
+
+@group
+@ifinfo
+# if no occurrence of "=()" print and load next line
+@end ifinfo
+@ifnotinfo
+# if no occurrence of @samp{=()} print and load next line
+@end ifnotinfo
+/=()/! @{ p; b; @}
+/ () $/! @{ p; b; @}
+@end group
+
+@group
+# possible start of functions section
+# save the line in case this is a var like FOO="() "
+h
+@end group
+
+@group
+# if the next line has a brace, we quit because
+# nothing comes after functions
+n
+/^@{/ q
+@end group
+
+@group
+# print the old line
+x; p
+@end group
+
+@group
+# work on the new line now
+x; bx
+'
+@end group
+@end example
+@c end---------------------------------------------
+
+@node Reverse chars of lines
+@section Reverse Characters of Lines
+
+This script can be used to reverse the position of characters
+in lines. The technique moves two characters at a time, hence
+it is faster than more intuitive implementations.
+
+Note the @code{tx} command before the definition of the label.
+This is often needed to reset the flag that is tested by
+the @code{t} command.
+
+Imaginative readers will find uses for this script. An example
+is reversing the output of @command{banner}.@footnote{This requires
+another script to pad the output of banner; for example
+
+@example
+#! /bin/sh
+
+banner -w $1 $2 $3 $4 |
+ sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' |
+ ~/sedscripts/reverseline.sed
+@end example
+}
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+/../! b
+
+@group
+# Reverse a line. Begin embedding the line between two newlines
+s/^.*$/\
+&\
+/
+@end group
+
+@group
+# Move first character at the end. The regexp matches until
+# there are zero or one characters between the markers
+tx
+:x
+s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/
+tx
+@end group
+
+@group
+# Remove the newline markers
+s/\n//g
+@end group
+@end example
+@c end---------------------------------------------
+
+@node tac
+@section Reverse Lines of Files
+
+This one begins a series of totally useless (yet interesting)
+scripts emulating various Unix commands. This, in particular,
+is a @command{tac} workalike.
+
+Note that on implementations other than @acronym{GNU} @command{sed}
+@ifset PERL
+and @value{SSED}
+@end ifset
+this script might easily overflow internal buffers.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+# reverse all lines of input, i.e. first line became last, ...
+
+@group
+# from the second line, the buffer (which contains all previous lines)
+# is *appended* to current line, so, the order will be reversed
+1! G
+@end group
+
+@group
+# on the last line we're done -- print everything
+$ p
+@end group
+
+@group
+# store everything on the buffer again
+h
+@end group
+@end example
+@c end---------------------------------------------
+
+@node cat -n
+@section Numbering Lines
+
+This script replaces @samp{cat -n}; in fact it formats its output
+exactly like @acronym{GNU} @command{cat} does.
+
+Of course this is completely useless and for two reasons: first,
+because somebody else did it in C, second, because the following
+Bourne-shell script could be used for the same purpose and would
+be much faster:
+
+@c start-------------------------------------------
+@example
+@group
+#! /bin/sh
+sed -e "=" $@@ | sed -e '
+ s/^/ /
+ N
+ s/^ *\(......\)\n/\1 /
+'
+@end group
+@end example
+@c end---------------------------------------------
+
+It uses @command{sed} to print the line number, then groups lines two
+by two using @code{N}. Of course, this script does not teach as much as
+the one presented below.
+
+The algorithm used for incrementing uses both buffers, so the line
+is printed as soon as possible and then discarded. The number
+is split so that changing digits go in a buffer and unchanged ones go
+in the other; the changed digits are modified in a single step
+(using a @code{y} command). The line number for the next line
+is then composed and stored in the hold space, to be used in the
+next iteration.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+# Prime the pump on the first line
+x
+/^$/ s/^.*$/1/
+@end group
+
+@group
+# Add the correct line number before the pattern
+G
+h
+@end group
+
+@group
+# Format it and print it
+s/^/ /
+s/^ *\(......\)\n/\1 /p
+@end group
+
+@group
+# Get the line number from hold space; add a zero
+# if we're going to add a digit on the next line
+g
+s/\n.*$//
+/^9*$/ s/^/0/
+@end group
+
+@group
+# separate changing/unchanged digits with an x
+s/.9*$/x&/
+@end group
+
+@group
+# keep changing digits in hold space
+h
+s/^.*x//
+y/0123456789/1234567890/
+x
+@end group
+
+@group
+# keep unchanged digits in pattern space
+s/x.*$//
+@end group
+
+@group
+# compose the new number, remove the newline implicitly added by G
+G
+s/\n//
+h
+@end group
+@end example
+@c end---------------------------------------------
+
+@node cat -b
+@section Numbering Non-blank Lines
+
+Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only
+have to select which lines are to be numbered and which are not.
+
+The part that is common to this script and the previous one is
+not commented to show how important it is to comment @command{sed}
+scripts properly...
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+/^$/ @{
+ p
+ b
+@}
+@end group
+
+@group
+# Same as cat -n from now
+x
+/^$/ s/^.*$/1/
+G
+h
+s/^/ /
+s/^ *\(......\)\n/\1 /p
+x
+s/\n.*$//
+/^9*$/ s/^/0/
+s/.9*$/x&/
+h
+s/^.*x//
+y/0123456789/1234567890/
+x
+s/x.*$//
+G
+s/\n//
+h
+@end group
+@end example
+@c end---------------------------------------------
+
+@node wc -c
+@section Counting Characters
+
+This script shows another way to do arithmetic with @command{sed}.
+In this case we have to add possibly large numbers, so implementing
+this by successive increments would not be feasible (and possibly
+even more complicated to contrive than this script).
+
+The approach is to map numbers to letters, kind of an abacus
+implemented with @command{sed}. @samp{a}s are units, @samp{b}s are
+tenths and so on: we simply add the number of characters
+on the current line as units, and then propagate the carry
+to tenths, hundredths, and so on.
+
+As usual, running totals are kept in hold space.
+
+On the last line, we convert the abacus form back to decimal.
+For the sake of variety, this is done with a loop rather than
+with some 80 @code{s} commands@footnote{Some implementations
+have a limit of 199 commands per script}: first we
+convert units, removing @samp{a}s from the number; then we
+rotate letters so that tenths become @samp{a}s, and so on
+until no more letters remain.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+# Add n+1 a's to hold space (+1 is for the newline)
+s/./a/g
+H
+x
+s/\n/a/
+@end group
+
+@group
+# Do the carry. The t's and b's are not necessary,
+# but they do speed up the thing
+t a
+: a; s/aaaaaaaaaa/b/g; t b; b done
+: b; s/bbbbbbbbbb/c/g; t c; b done
+: c; s/cccccccccc/d/g; t d; b done
+: d; s/dddddddddd/e/g; t e; b done
+: e; s/eeeeeeeeee/f/g; t f; b done
+: f; s/ffffffffff/g/g; t g; b done
+: g; s/gggggggggg/h/g; t h; b done
+: h; s/hhhhhhhhhh//g
+@end group
+
+@group
+: done
+$! @{
+ h
+ b
+@}
+@end group
+
+# On the last line, convert back to decimal
+
+@group
+: loop
+/a/! s/[b-h]*/&0/
+s/aaaaaaaaa/9/
+s/aaaaaaaa/8/
+s/aaaaaaa/7/
+s/aaaaaa/6/
+s/aaaaa/5/
+s/aaaa/4/
+s/aaa/3/
+s/aa/2/
+s/a/1/
+@end group
+
+@group
+: next
+y/bcdefgh/abcdefg/
+/[a-h]/ b loop
+p
+@end group
+@end example
+@c end---------------------------------------------
+
+@node wc -w
+@section Counting Words
+
+This script is almost the same as the previous one, once each
+of the words on the line is converted to a single @samp{a}
+(in the previous script each letter was changed to an @samp{a}).
+
+It is interesting that real @command{wc} programs have optimized
+loops for @samp{wc -c}, so they are much slower at counting
+words rather than characters. This script's bottleneck,
+instead, is arithmetic, and hence the word-counting one
+is faster (it has to manage smaller numbers).
+
+Again, the common parts are not commented to show the importance
+of commenting @command{sed} scripts.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+# Convert words to a's
+s/[ @kbd{tab}][ @kbd{tab}]*/ /g
+s/^/ /
+s/ [^ ][^ ]*/a /g
+s/ //g
+@end group
+
+@group
+# Append them to hold space
+H
+x
+s/\n//
+@end group
+
+@group
+# From here on it is the same as in wc -c.
+/aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g
+/bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g
+/cccccccccc/! bx; s/cccccccccc/d/g
+/dddddddddd/! bx; s/dddddddddd/e/g
+/eeeeeeeeee/! bx; s/eeeeeeeeee/f/g
+/ffffffffff/! bx; s/ffffffffff/g/g
+/gggggggggg/! bx; s/gggggggggg/h/g
+s/hhhhhhhhhh//g
+:x
+$! @{ h; b; @}
+:y
+/a/! s/[b-h]*/&0/
+s/aaaaaaaaa/9/
+s/aaaaaaaa/8/
+s/aaaaaaa/7/
+s/aaaaaa/6/
+s/aaaaa/5/
+s/aaaa/4/
+s/aaa/3/
+s/aa/2/
+s/a/1/
+y/bcdefgh/abcdefg/
+/[a-h]/ by
+p
+@end group
+@end example
+@c end---------------------------------------------
+
+@node wc -l
+@section Counting Lines
+
+No strange things are done now, because @command{sed} gives us
+@samp{wc -l} functionality for free!!! Look:
+
+@c start-------------------------------------------
+@example
+@group
+#!/usr/bin/sed -nf
+$=
+@end group
+@end example
+@c end---------------------------------------------
+
+@node head
+@section Printing the First Lines
+
+This script is probably the simplest useful @command{sed} script.
+It displays the first 10 lines of input; the number of displayed
+lines is right before the @code{q} command.
+
+@c start-------------------------------------------
+@example
+@group
+#!/usr/bin/sed -f
+10q
+@end group
+@end example
+@c end---------------------------------------------
+
+@node tail
+@section Printing the Last Lines
+
+Printing the last @var{n} lines rather than the first is more complex
+but indeed possible. @var{n} is encoded in the second line, before
+the bang character.
+
+This script is similar to the @command{tac} script in that it keeps the
+final output in the hold space and prints it at the end:
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+1! @{; H; g; @}
+1,10 !s/[^\n]*\n//
+$p
+h
+@end group
+@end example
+@c end---------------------------------------------
+
+Mainly, the scripts keeps a window of 10 lines and slides it
+by adding a line and deleting the oldest (the substitution command
+on the second line works like a @code{D} command but does not
+restart the loop).
+
+The ``sliding window'' technique is a very powerful way to write
+efficient and complex @command{sed} scripts, because commands like
+@code{P} would require a lot of work if implemented manually.
+
+To introduce the technique, which is fully demonstrated in the
+rest of this chapter and is based on the @code{N}, @code{P}
+and @code{D} commands, here is an implementation of @command{tail}
+using a simple ``sliding window.''
+
+This looks complicated but in fact the working is the same as
+the last script: after we have kicked in the appropriate number
+of lines, however, we stop using the hold space to keep inter-line
+state, and instead use @code{N} and @code{D} to slide pattern
+space by one line:
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+@group
+1h
+2,10 @{; H; g; @}
+$q
+1,9d
+N
+D
+@end group
+@end example
+@c end---------------------------------------------
+
+
+@node uniq
+@section Make Duplicate Lines Unique
+
+This is an example of the art of using the @code{N}, @code{P}
+and @code{D} commands, probably the most difficult to master.
+
+@c start-------------------------------------------
+@example
+@group
+#!/usr/bin/sed -f
+h
+@end group
+
+@group
+:b
+# On the last line, print and exit
+$b
+N
+/^\(.*\)\n\1$/ @{
+ # The two lines are identical. Undo the effect of
+ # the n command.
+ g
+ bb
+@}
+@end group
+
+@group
+# If the @code{N} command had added the last line, print and exit
+$b
+@end group
+
+@group
+# The lines are different; print the first and go
+# back working on the second.
+P
+D
+@end group
+@end example
+@c end---------------------------------------------
+
+As you can see, we mantain a 2-line window using @code{P} and @code{D}.
+This technique is often used in advanced @command{sed} scripts.
+
+@node uniq -d
+@section Print Duplicated Lines of Input
+
+This script prints only duplicated lines, like @samp{uniq -d}.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+$b
+N
+/^\(.*\)\n\1$/ @{
+ # Print the first of the duplicated lines
+ s/.*\n//
+ p
+@end group
+
+@group
+ # Loop until we get a different line
+ :b
+ $b
+ N
+ /^\(.*\)\n\1$/ @{
+ s/.*\n//
+ bb
+ @}
+@}
+@end group
+
+@group
+# The last line cannot be followed by duplicates
+$b
+@end group
+
+@group
+# Found a different one. Leave it alone in the pattern space
+# and go back to the top, hunting its duplicates
+D
+@end group
+@end example
+@c end---------------------------------------------
+
+@node uniq -u
+@section Remove All Duplicated Lines
+
+This script prints only unique lines, like @samp{uniq -u}.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+@group
+# Search for a duplicate line --- until that, print what you find.
+$b
+N
+/^\(.*\)\n\1$/ ! @{
+ P
+ D
+@}
+@end group
+
+@group
+:c
+# Got two equal lines in pattern space. At the
+# end of the file we simply exit
+$d
+@end group
+
+@group
+# Else, we keep reading lines with @code{N} until we
+# find a different one
+s/.*\n//
+N
+/^\(.*\)\n\1$/ @{
+ bc
+@}
+@end group
+
+@group
+# Remove the last instance of the duplicate line
+# and go back to the top
+D
+@end group
+@end example
+@c end---------------------------------------------
+
+@node cat -s
+@section Squeezing Blank Lines
+
+As a final example, here are three scripts, of increasing complexity
+and speed, that implement the same function as @samp{cat -s}, that is
+squeezing blank lines.
+
+The first leaves a blank line at the beginning and end if there are
+some already.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+@group
+# on empty lines, join with next
+# Note there is a star in the regexp
+:x
+/^\n*$/ @{
+N
+bx
+@}
+@end group
+
+@group
+# now, squeeze all '\n', this can be also done by:
+# s/^\(\n\)*/\1/
+s/\n*/\
+/
+@end group
+@end example
+@c end---------------------------------------------
+
+This one is a bit more complex and removes all empty lines
+at the beginning. It does leave a single blank line at end
+if one was there.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -f
+
+@group
+# delete all leading empty lines
+1,/^./@{
+/./!d
+@}
+@end group
+
+@group
+# on an empty line we remove it and all the following
+# empty lines, but one
+:x
+/./!@{
+N
+s/^\n$//
+tx
+@}
+@end group
+@end example
+@c end---------------------------------------------
+
+This removes leading and trailing blank lines. It is also the
+fastest. Note that loops are completely done with @code{n} and
+@code{b}, without exploting the fact that @command{sed} cycles back
+to the top of the script automatically at the end of a line.
+
+@c start-------------------------------------------
+@example
+#!/usr/bin/sed -nf
+
+@group
+# delete all (leading) blanks
+/./!d
+@end group
+
+@group
+# get here: so there is a non empty
+:x
+# print it
+p
+# get next
+n
+# got chars? print it again, etc...
+/./bx
+@end group
+
+@group
+# no, don't have chars: got an empty line
+:z
+# get next, if last line we finish here so no trailing
+# empty lines are written
+n
+# also empty? then ignore it, and get next... this will
+# remove ALL empty lines
+/./!bz
+@end group
+
+@group
+# all empty lines were deleted/ignored, but we have a non empty. As
+# what we want to do is to squeeze, insert a blank line artificially
+i\
+@end group
+
+bx
+@end example
+@c end---------------------------------------------
+
+@node Limitations
+@chapter @value{SSED}'s Limitations and Non-limitations
+
+@cindex @acronym{GNU} extensions, unlimited line length
+@cindex Portability, line length limitations
+For those who want to write portable @command{sed} scripts,
+be aware that some implementations have been known to
+limit line lengths (for the pattern and hold spaces)
+to be no more than 4000 bytes.
+The @sc{posix} standard specifies that conforming @command{sed}
+implementations shall support at least 8192 byte line lengths.
+@value{SSED} has no built-in limit on line length;
+as long as it can @code{malloc()} more (virtual) memory,
+you can feed or construct lines as long as you like.
+
+However, recursion is used to handle subpatterns and indefinite
+repetition. This means that the available stack space may limit
+the size of the buffer that can be processed by certain patterns.
+
+@ifset PERL
+There are some size limitations in the regular expression
+matcher but it is hoped that they will never in practice
+be relevant. The maximum length of a compiled pattern
+is 65539 (sic) bytes. All values in repeating quantifiers
+must be less than 65536. The maximum nesting depth of
+all parenthesized subpatterns, including capturing and
+non-capturing subpatterns@footnote{The
+distinction is meaningful when referring to Perl-style
+regular expressions.}, assertions, and other types of
+subpattern, is 200.
+
+Also, @value{SSED} recognizes the @sc{posix} syntax
+@code{[.@var{ch}.]} and @code{[=@var{ch}=]}
+where @var{ch} is a ``collating element'', but these
+are not supported, and an error is given if they are
+encountered.
+
+Here are a few distinctions between the real Perl-style
+regular expressions and those that @option{-R} recognizes.
+
+@enumerate
+@item
+Lookahead assertions do not allow repeat quantifiers after them
+Perl permits them, but they do not mean what you
+might think. For example, @samp{(?!a)@{3@}} does not assert that the
+next three characters are not @samp{a}. It just asserts three times that the
+next character is not @samp{a} --- a waste of time and nothing else.
+
+@item
+Capturing subpatterns that occur inside negative lookahead
+head assertions are counted, but their entries are counted
+as empty in the second half of an @code{s} command.
+Perl sets its numerical variables from any such patterns
+that are matched before the assertion fails to match
+something (thereby succeeding), but only if the negative
+lookahead assertion contains just one branch.
+
+@item
+The following Perl escape sequences are not supported:
+@samp{\l}, @samp{\u}, @samp{\L}, @samp{\U}, @samp{\E},
+@samp{\Q}. In fact these are implemented by Perl's general
+string-handling and are not part of its pattern matching engine.
+
+@item
+The Perl @samp{\G} assertion is not supported as it is not
+relevant to single pattern matches.
+
+@item
+Fairly obviously, @value{SSED} does not support the @samp{(?@{code@})}
+and @samp{(?p@{code@})} constructions. However, there is some experimental
+support for recursive patterns using the non-Perl item @samp{(?R)}.
+
+@item
+There are at the time of writing some oddities in Perl
+5.005_02 concerned with the settings of captured strings
+when part of a pattern is repeated. For example, matching
+@samp{aba} against the pattern @samp{/^(a(b)?)+$/} sets
+@samp{$2}@footnote{@samp{$2} would be @samp{\2} in @value{SSED}.}
+to the value @samp{b}, but matching @samp{aabbaa}
+against @samp{/^(aa(bb)?)+$/} leaves @samp{$2}
+unset. However, if the pattern is changed to
+@samp{/^(aa(b(b))?)+$/} then @samp{$2} (and @samp{$3}) are set.
+In Perl 5.004 @samp{$2} is set in both cases, and that is also
+true of @value{SSED}.
+
+@item
+Another as yet unresolved discrepancy is that in Perl
+5.005_02 the pattern @samp{/^(a)?(?(1)a|b)+$/} matches
+the string @samp{a}, whereas in @value{SSED} it does not.
+However, in both Perl and @value{SSED} @samp{/^(a)?a/} matched
+against @samp{a} leaves $1 unset.
+@end enumerate
+@end ifset
+
+@node Other Resources
+@chapter Other Resources for Learning About @command{sed}
+
+@cindex Additional reading about @command{sed}
+In addition to several books that have been written about @command{sed}
+(either specifically or as chapters in books which discuss
+shell programming), one can find out more about @command{sed}
+(including suggestions of a few books) from the FAQ
+for the @code{sed-users} mailing list, available from any of:
+@display
+ @uref{http://www.student.northpark.edu/pemente/sed/sedfaq.html}
+ @uref{http://sed.sf.net/grabbag/tutorials/sedfaq.html}
+@end display
+
+Also of interest are
+@uref{http://www.student.northpark.edu/pemente/sed/index.htm}
+and @uref{http://sed.sf.net/grabbag},
+which include @command{sed} tutorials and other @command{sed}-related goodies.
+
+The @code{sed-users} mailing list itself maintained by Sven Guckes.
+To subscribe, visit @uref{http://groups.yahoo.com} and search
+for the @code{sed-users} mailing list.
+
+@node Reporting Bugs
+@chapter Reporting Bugs
+
+@cindex Bugs, reporting
+Email bug reports to @email{bonzini@@gnu.org}.
+Be sure to include the word ``sed'' somewhere in the @code{Subject:} field.
+Also, please include the output of @samp{sed --version} in the body
+of your report if at all possible.
+
+Please do not send a bug report like this:
+
+@example
+@i{while building frobme-1.3.4}
+$ configure
+@error{} sed: file sedscr line 1: Unknown option to 's'
+@end example
+
+If @value{SSED} doesn't configure your favorite package, take a
+few extra minutes to identify the specific problem and make a stand-alone
+test case. Unlike other programs such as C compilers, making such test
+cases for @command{sed} is quite simple.
+
+A stand-alone test case includes all the data necessary to perform the
+test, and the specific invocation of @command{sed} that causes the problem.
+The smaller a stand-alone test case is, the better. A test case should
+not involve something as far removed from @command{sed} as ``try to configure
+frobme-1.3.4''. Yes, that is in principle enough information to look
+for the bug, but that is not a very practical prospect.
+
+Here are a few commonly reported bugs that are not bugs.
+
+@table @asis
+@item @code{N} command on the last line
+@cindex Portability, @code{N} command on the last line
+@cindex Non-bugs, @code{N} command on the last line
+
+Most versions of @command{sed} exit without printing anything when
+the @command{N} command is issued on the last line of a file.
+@value{SSED} prints pattern space before exiting unless of course
+the @command{-n} command switch has been specified. This choice is
+by design.
+
+For example, the behavior of
+@example
+sed N foo bar
+@end example
+@noindent
+would depend on whether foo has an even or an odd number of
+lines@footnote{which is the actual ``bug'' that prompted the
+change in behavior}. Or, when writing a script to read the
+next few lines following a pattern match, traditional
+implementations of @code{sed} would force you to write
+something like
+@example
+/foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @}
+@end example
+@noindent
+instead of just
+@example
+/foo/@{ N;N;N;N;N;N;N;N;N; @}
+@end example
+
+@cindex @code{POSIXLY_CORRECT} behavior, @code{N} command
+In any case, the simplest workaround is to use @code{$d;N} in
+scripts that rely on the traditional behavior, or to set
+the @code{POSIXLY_CORRECT} variable to a non-empty value.
+
+@item Regex syntax clashes (problems with backslashes)
+@cindex @acronym{GNU} extensions, to basic regular expressions
+@cindex Non-bugs, regex syntax clashes
+@command{sed} uses the @sc{posix} basic regular expression syntax. According to
+the standard, the meaning of some escape sequences is undefined in
+this syntax; notable in the case of @command{sed} are @code{\|},
+@code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<},
+@code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}.
+
+As in all @acronym{GNU} programs that use @sc{posix} basic regular
+expressions, @command{sed} interprets these escape sequences as special
+characters. So, @code{x\+} matches one or more occurrences of @samp{x}.
+@code{abc\|def} matches either @samp{abc} or @samp{def}.
+
+This syntax may cause problems when running scripts written for other
+@command{sed}s. Some @command{sed} programs have been written with the
+assumption that @code{\|} and @code{\+} match the literal characters
+@code{|} and @code{+}. Such scripts must be modified by removing the
+spurious backslashes if they are to be used with modern implementations
+of @command{sed}, like
+@ifset PERL
+@value{SSED} or
+@end ifset
+@acronym{GNU} @command{sed}.
+
+On the other hand, some scripts use s|abc\|def||g to remove occurrences
+of @emph{either} @code{abc} or @code{def}. While this worked until
+@command{sed} 4.0.x, newer versions interpret this as removing the
+string @code{abc|def}. This is again undefined behavior according to
+@acronym{POSIX}, and this interpretation is arguably more robust: older
+@command{sed}s, for example, required that the regex matcher parsed
+@code{\/} as @code{/} in the common case of escaping a slash, which is
+again undefined behavior; the new behavior avoids this, and this is good
+because the regex matcher is only partially under our control.
+
+@cindex @acronym{GNU} extensions, special escapes
+In addition, this version of @command{sed} supports several escape characters
+(some of which are multi-character) to insert non-printable characters
+in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r},
+@code{\t}, @code{\v}, @code{\x}). These can cause similar problems
+with scripts written for other @command{sed}s.
+
+@item @option{-i} clobbers read-only files
+@cindex In-place editing
+@cindex @value{SSEDEXT}, in-place editing
+@cindex Non-bugs, in-place editing
+
+In short, @samp{sed -i} will let you delete the contents of
+a read-only file, and in general the @option{-i} option
+(@pxref{Invoking sed, , Invocation}) lets you clobber
+protected files. This is not a bug, but rather a consequence
+of how the Unix filesystem works.
+
+The permissions on a file say what can happen to the data
+in that file, while the permissions on a directory say what can
+happen to the list of files in that directory. @samp{sed -i}
+will not ever open for writing a file that is already on disk.
+Rather, it will work on a temporary file that is finally renamed
+to the original name: if you rename or delete files, you're actually
+modifying the contents of the directory, so the operation depends on
+the permissions of the directory, not of the file. For this same
+reason, @command{sed} does not let you use @option{-i} on a writeable file
+in a read-only directory (but unbelievably nobody reports that as a
+bug@dots{}).
+
+@item @code{0a} does not work (gives an error)
+There is no line 0. 0 is a special address that is only used to treat
+addresses like @samp{0,/@var{RE}/} as active when the script starts: if
+you write @samp{1,/abc/d} and the first line includes the word @samp{abc},
+then that match would be ignored because address ranges must span at least
+two lines (barring the end of the file); but what you probably wanted is
+to delete every line up to the first one including @samp{abc}, and this
+is obtained with @samp{0,/abc/d}.
+@end table
+
+@node Extended regexps
+@appendix Extended regular expressions
+@cindex Extended regular expressions, syntax
+
+The only difference between basic and extended regular expressions is in
+the behavior of a few characters: @samp{?}, @samp{+}, parentheses,
+and braces (@samp{@{@}}). While basic regular expressions require
+these to be escaped if you want them to behave as special characters,
+when using extended regular expressions you must escape them if
+you want them @emph{to match a literal character}.
+
+@noindent
+Examples:
+@table @code
+@item abc?
+becomes @samp{abc\?} when using extended regular expressions. It matches
+the literal string @samp{abc?}.
+
+@item c\+
+becomes @samp{c+} when using extended regular expressions. It matches
+one or more @samp{c}s.
+
+@item a\@{3,\@}
+becomes @samp{a@{3,@}} when using extended regular expressions. It matches
+three or more @samp{a}s.
+
+@item \(abc\)\@{2,3\@}
+becomes @samp{(abc)@{2,3@}} when using extended regular expressions. It
+matches either @samp{abcabc} or @samp{abcabcabc}.
+
+@item \(abc*\)\1
+becomes @samp{(abc*)\1} when using extended regular expressions.
+Backreferences must still be escaped when using extended regular
+expressions.
+@end table
+
+@ifset PERL
+@node Perl regexps
+@appendix Perl-style regular expressions
+@cindex Perl-style regular expressions, syntax
+
+@emph{This part is taken from the @file{pcre.txt} file distributed together
+with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.}
+
+Perl introduced several extensions to regular expressions, some
+of them incompatible with the syntax of regular expressions
+accepted by Emacs and other @acronym{GNU} tools (whose matcher was
+based on the Emacs matcher). @value{SSED} implements
+both kinds of extensions.
+
+@iftex
+Summarizing, we have:
+
+@itemize @bullet
+@item
+A backslash can introduce several special sequences
+
+@item
+The circumflex, dollar sign, and period characters behave specially
+with regard to new lines
+
+@item
+Strange uses of square brackets are parsed differently
+
+@item
+You can toggle modifiers in the middle of a regular expression
+
+@item
+You can specify that a subpattern does not count when numbering backreferences
+
+@item
+@cindex Greedy regular expression matching
+You can specify greedy or non-greedy matching
+
+@item
+You can have more than ten back references
+
+@item
+You can do complex look aheads and look behinds (in the spirit of
+@code{\b}, but with subpatterns).
+
+@item
+You can often improve performance by avoiding that @command{sed} wastes
+time with backtracking
+
+@item
+You can have if/then/else branches
+
+@item
+You can do recursive matches, for example to look for unbalanced parentheses
+
+@item
+You can have comments and non-significant whitespace, because things can
+get complex...
+@end itemize
+
+Most of these extensions are introduced by the special @code{(?}
+sequence, which gives special meanings to parenthesized groups.
+@end iftex
+@menu
+Other extensions can be roughly subdivided in two categories
+On one hand Perl introduces several more escaped sequences
+(that is, sequences introduced by a backslash). On the other
+hand, it specifies that if a question mark follows an open
+parentheses it should give a special meaning to the parenthesized
+group.
+
+* Backslash:: Introduces special sequences
+* Circumflex/dollar sign/period:: Behave specially with regard to new lines
+* Square brackets:: Are a bit different in strange cases
+* Options setting:: Toggle modifiers in the middle of a regexp
+* Non-capturing subpatterns:: Are not counted when backreferencing
+* Repetition:: Allows for non-greedy matching
+* Backreferences:: Allows for more than 10 back references
+* Assertions:: Allows for complex look ahead matches
+* Non-backtracking subpatterns:: Often gives more performance
+* Conditional subpatterns:: Allows if/then/else branches
+* Recursive patterns:: For example to match parentheses
+* Comments:: Because things can get complex...
+@end menu
+
+@node Backslash
+@appendixsec Backslash
+@cindex Perl-style regular expressions, escaped sequences
+
+There are a few difference in the handling of backslashed
+sequences in Perl mode.
+
+First of all, there are no @code{\o} and @code{\d} sequences.
+@sc{ascii} values for characters can be specified in octal
+with a @code{\@var{xxx}} sequence, where @var{xxx} is a
+sequence of up to three octal digits. If the first digit
+is a zero, the treatment of the sequence is straightforward;
+just note that if the character that follows the escaped digit
+is itself an octal digit, you have to supply three octal digits
+for @var{xxx}. For example @code{\07} is a @sc{bel} character
+rather than a @sc{nul} and a literal @code{7} (this sequence is
+instead represented by @code{\0007}).
+
+@cindex Perl-style regular expressions, backreferences
+The handling of a backslash followed by a digit other than 0
+is complicated. Outside a character class, @command{sed} reads it
+and any following digits as a decimal number. If the number
+is less than 10, or if there have been at least that many
+previous capturing left parentheses in the expression, the
+entire sequence is taken as a back reference. A description
+of how this works is given later, following the discussion
+of parenthesized subpatterns.
+
+Inside a character class, or if the decimal number is
+greater than 9 and there have not been that many capturing
+subpatterns, @command{sed} re-reads up to three octal digits following
+the backslash, and generates a single byte from the
+least significant 8 bits of the value. Any subsequent digits
+stand for themselves. For example:
+
+@example
+ \040 @i{is another way of writing a space}
+ \40 @i{is the same, provided there are fewer than 40}
+ @i{previous capturing subpatterns}
+ \7 @i{is always a back reference}
+ \011 @i{is always a tab}
+ \11 @i{might be a back reference, or another way of}
+ @i{writing a tab}
+ \0113 @i{is a tab followed by the character @samp{3}}
+ \113 @i{is the character with octal code 113 (since there}
+ @i{can be no more than 99 back references)}
+ \377 @i{is a byte consisting entirely of 1 bits (@sc{ascii} 255)}
+ \81 @i{is either a back reference, or a binary zero}
+ @i{followed by the two characters @samp{81}}
+@end example
+
+Note that octal values of 100 or greater must not be introduced
+duced by a leading zero, because no more than three octal
+digits are ever read.
+
+All the sequences that define a single byte value can be
+used both inside and outside character classes. In addition,
+inside a character class, the sequence @code{\b} is interpreted
+as the backspace character (hex 08). Outside a character
+class it has a different meaning (see below).
+
+In addition, there are four additional escapes specifying
+generic character classes (like @code{\w} and @code{\W} do):
+
+@cindex Perl-style regular expressions, character classes
+@table @samp
+@item \d
+Matches any decimal digit
+
+@item \D
+Matches any character that is not a decimal digit
+@end table
+
+In Perl mode, these character type sequences can appear both inside and
+outside character classes. Instead, in @sc{posix} mode these sequences
+(as well as @code{\w} and @code{\W}) are treated as two literal characters
+(a backslash and a letter) inside square brackets.
+
+Escaped sequences specifying assertions are also different in
+Perl mode. An assertion specifies a condition that has to be met
+at a particular point in a match, without consuming any
+characters from the subject string. The use of subpatterns
+for more complicated assertions is described below. The
+backslashed assertions are
+
+@cindex Perl-style regular expressions, assertions
+@table @samp
+@item \b
+Asserts that the point is at a word boundary.
+A word boundary is a position in the subject string where
+the current character and the previous character do not both
+match @code{\w} or @code{\W} (i.e. one matches @code{\w} and
+the other matches @code{\W}), or the start or end of the string
+if the first or last character matches @code{\w}, respectively.
+
+@item \B
+Asserts that the point is not at a word boundary.
+
+@item \A
+Asserts the matcher is at the start of pattern space (independent
+of multiline mode).
+
+@item \Z
+Asserts the matcher is at the end of pattern space,
+or at a newline before the end of pattern space (independent of
+multiline mode)
+
+@item \z
+Asserts the matcher is at the end of pattern space (independent
+of multiline mode)
+@end table
+
+These assertions may not appear in character classes (but
+note that @code{\b} has a different meaning, namely the
+backspace character, inside a character class).
+Note that Perl mode does not support directly assertions
+for the beginning and the end of word; the @acronym{GNU} extensions
+@code{\<} and @code{\>} achieve this purpose in @sc{posix} mode
+instead.
+
+The @code{\A}, @code{\Z}, and @code{\z} assertions differ
+from the traditional circumflex and dollar sign (described below)
+in that they only ever match at the very start and end of the
+subject string, whatever options are set; in particular @code{\A}
+and @code{\z} are the same as the @acronym{GNU} extensions
+@code{\`} and @code{\'} that are active in @sc{posix} mode.
+
+@node Circumflex/dollar sign/period
+@appendixsec Circumflex, dollar sign, period
+@cindex Perl-style regular expressions, newlines
+
+Outside a character class, in the default matching mode, the
+circumflex character is an assertion which is true only if
+the current matching point is at the start of the subject
+string. Inside a character class, the circumflex has an entirely
+different meaning (see below).
+
+The circumflex need not be the first character of the pattern if
+a number of alternatives are involved, but it should be the
+first thing in each alternative in which it appears if the
+pattern is ever to match that branch. If all possible alternatives,
+start with a circumflex, that is, if the pattern is
+constrained to match only at the start of the subject, it is
+said to be an @dfn{anchored} pattern. (There are also other constructs
+structs that can cause a pattern to be anchored.)
+
+A dollar sign is an assertion which is true only if the
+current matching point is at the end of the subject string,
+or immediately before a newline character that is the last
+character in the string (by default). A dollar sign need not be the
+last character of the pattern if a number of alternatives
+are involved, but it should be the last item in any branch
+in which it appears. A dollar sign has no special meaning in a
+character class.
+
+@cindex Perl-style regular expressions, multiline
+The meanings of the circumflex and dollar sign characters are
+changed if the @code{M} modifier option is used. When this is
+the case, they match immediately after and immediately
+before an internal @code{\n} character, respectively, in addition
+to matching at the start and end of the subject string. For
+example, the pattern @code{/^abc$/} matches the subject string
+@samp{def\nabc} in multiline mode, but not otherwise. Consequently,
+patterns that are anchored in single line mode
+because all branches start with @code{^} are not anchored in
+multiline mode.
+
+@cindex Perl-style regular expressions, multiline
+Note that the sequences @code{\A}, @code{\Z}, and @code{\z}
+can be used to match the start and end of the subject in both
+modes, and if all branches of a pattern start with @code{\A}
+is it always anchored, whether the @code{M} modifier is set or not.
+
+@cindex Perl-style regular expressions, single line
+Outside a character class, a dot in the pattern matches any
+one character in the subject, including a non-printing character,
+but not (by default) newline. If the @code{S} modifier is used,
+dots match newlines as well. Actually, the handling of
+dot is entirely independent of the handling of circumflex
+and dollar sign, the only relationship being that they both
+involve newline characters. Dot has no special meaning in a
+character class.
+
+@node Square brackets
+@appendixsec Square brackets
+@cindex Perl-style regular expressions, character classes
+
+An opening square bracket introduces a character class, terminated
+by a closing square bracket. A closing square bracket on its own
+is not special. If a closing square bracket is required as a
+member of the class, it should be the first data character in
+the class (after an initial circumflex, if present) or escaped with a backslash.
+
+A character class matches a single character in the subject;
+the character must be in the set of characters defined by
+the class, unless the first character in the class is a circumflex,
+in which case the subject character must not be in
+the set defined by the class. If a circumflex is actually
+required as a member of the class, ensure it is not the
+first character, or escape it with a backslash.
+
+For example, the character class [aeiou] matches any lower
+case vowel, while [^aeiou] matches any character that is not
+a lower case vowel. Note that a circumflex is just a convenient
+venient notation for specifying the characters which are in
+the class by enumerating those that are not. It is not an
+assertion: it still consumes a character from the subject
+string, and fails if the current pointer is at the end of
+the string.
+
+@cindex Perl-style regular expressions, case-insensitive
+When caseless matching is set, any letters in a class
+represent both their upper case and lower case versions, so
+for example, a caseless @code{[aeiou]} matches uppercase
+and lowercase @samp{A}s, and a caseless @code{[^aeiou]}
+does not match @samp{A}, whereas a case-sensitive version would.
+
+@cindex Perl-style regular expressions, single line
+@cindex Perl-style regular expressions, multiline
+The newline character is never treated in any special way in
+character classes, whatever the setting of the @code{S} and
+@code{M} options (modifiers) is. A class such as @code{[^a]} will
+always match a newline.
+
+The minus (hyphen) character can be used to specify a range
+of characters in a character class. For example, @code{[d-m]}
+matches any letter between d and m, inclusive. If a minus
+character is required in a class, it must be escaped with a
+backslash or appear in a position where it cannot be interpreted
+as indicating a range, typically as the first or last
+character in the class.
+
+It is not possible to have the literal character @code{]} as the
+end character of a range. A pattern such as @code{[W-]46]} is
+interpreted as a class of two characters (@code{W} and @code{-})
+followed by a literal string @code{46]}, so it would match
+@samp{W46]} or @samp{-46]}. However, if the @code{]} is escaped
+with a backslash it is interpreted as the end of range, so
+@code{[W-\]46]} is interpreted as a single class containing a
+range followed by two separate characters. The octal or
+hexadecimal representation of @code{]} can also be used to end a range.
+
+Ranges operate in @sc{ascii} collating sequence. They can also be
+used for characters specified numerically, for example
+@code{[\000-\037]}. If a range that includes letters is used when
+caseless matching is set, it matches the letters in either
+case. For example, a caseless @code{[W-c]} is equivalent to
+@code{[][\^_`wxyzabc]}, matched caselessly, and if character
+tables for the French locale are in use, @code{[\xc8-\xcb]}
+matches accented E characters in both cases.
+
+Unlike in @sc{posix} mode, the character types @code{\d},
+@code{\D}, @code{\s}, @code{\S}, @code{\w}, and @code{\W}
+may also appear in a character class, and add the characters
+that they match to the class. For example, @code{[\dABCDEF]} matches any
+hexadecimal digit. A circumflex can conveniently be used
+with the upper case character types to specify a more restricted
+set of characters than the matching lower case type.
+For example, the class @code{[^\W_]} matches any letter or digit,
+but not underscore.
+
+All non-alphameric characters other than @code{\}, @code{-},
+@code{^} (at the start) and the terminating @code{]}
+are non-special in character classes, but it does no harm
+if they are escaped.
+
+Perl 5.6 supports the @sc{posix} notation for character classes, which
+uses names enclosed by @code{[:} and @code{:]} within the enclosing
+square brackets, and @value{SSED} supports this notation as well.
+For example,
+
+@example
+ [01[:alpha:]%]
+@end example
+
+@noindent
+matches @samp{0}, @samp{1}, any alphabetic character, or @samp{%}.
+The supported class names are
+
+@table @code
+@item alnum
+Matches letters and digits
+
+@item alpha
+Matches letters
+
+@item ascii
+Matches character codes 0 - 127
+
+@item cntrl
+Matches control characters
+
+@item digit
+Matches decimal digits (same as \d)
+
+@item graph
+Matches printing characters, excluding space
+
+@item lower
+Matches lower case letters
+
+@item print
+Matches printing characters, including space
+
+@item punct
+Matches printing characters, excluding letters and digits
+
+@item space
+Matches white space (same as \s)
+
+@item upper
+Matches upper case letters
+
+@item word
+Matches ``word'' characters (same as \w)
+
+@item xdigit
+Matches hexadecimal digits
+@end table
+
+The names @code{ascii} and @code{word} are extensions valid only in
+Perl mode. Another Perl extension is negation, which is
+indicated by a circumflex character after the colon. For example,
+
+@example
+ [12[:^digit:]]
+@end example
+
+@noindent
+matches @samp{1}, @samp{2}, or any non-digit.
+
+@node Options setting
+@appendixsec Options setting
+@cindex Perl-style regular expressions, toggling options
+@cindex Perl-style regular expressions, case-insensitive
+@cindex Perl-style regular expressions, multiline
+@cindex Perl-style regular expressions, single line
+@cindex Perl-style regular expressions, extended
+
+The settings of the @code{I}, @code{M}, @code{S}, @code{X}
+modifiers can be changed from within the pattern by
+a sequence of Perl option letters enclosed between @code{(?}
+and @code{)}. The option letters must be lowercase.
+
+For example, @code{(?im)} sets caseless, multiline matching. It is
+also possible to unset these options by preceding the letter
+with a hyphen; you can also have combined settings and unsettings:
+@code{(?im-sx)} sets caseless and multiline matching,
+while unsets single line matching (for dots) and extended
+whitespace interpretation. If a letter appears both before
+and after the hyphen, the option is unset.
+
+The scope of these option changes depends on where in the
+pattern the setting occurs. For settings that are outside
+any subpattern (defined below), the effect is the same as if
+the options were set or unset at the start of matching. The
+following patterns all behave in exactly the same way:
+
+@example
+ (?i)abc
+ a(?i)bc
+ ab(?i)c
+ abc(?i)
+@end example
+
+which in turn is the same as specifying the pattern abc with
+the @code{I} modifier. In other words, ``top level'' settings
+apply to the whole pattern (unless there are other
+changes inside subpatterns). If there is more than one setting
+of the same option at top level, the rightmost setting
+is used.
+
+If an option change occurs inside a subpattern, the effect
+is different. This is a change of behaviour in Perl 5.005.
+An option change inside a subpattern affects only that part
+of the subpattern @emph{that follows} it, so
+
+@example
+ (a(?i)b)c
+@end example
+
+@noindent
+matches abc and aBc and no other strings (assuming
+case-sensitive matching is used). By this means, options can
+be made to have different settings in different parts of the
+pattern. Any changes made in one alternative do carry on
+into subsequent branches within the same subpattern. For
+example,
+
+@example
+ (a(?i)b|c)
+@end example
+
+@noindent
+matches @samp{ab}, @samp{aB}, @samp{c}, and @samp{C},
+even though when matching @samp{C} the first branch is
+abandoned before the option setting.
+This is because the effects of option settings happen at
+compile time. There would be some very weird behaviour otherwise.
+
+@ignore
+There are two PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA
+that can be changed in the same way as the Perl-compatible options by
+using the characters U and X respectively. The (?X) flag
+setting is special in that it must always occur earlier in
+the pattern than any of the additional features it turns on,
+even when it is at top level. It is best put at the start.
+@end ignore
+
+
+@node Non-capturing subpatterns
+@appendixsec Non-capturing subpatterns
+@cindex Perl-style regular expressions, non-capturing subpatterns
+
+Marking part of a pattern as a subpattern does two things.
+On one hand, it localizes a set of alternatives; on the other
+hand, it sets up the subpattern as a capturing subpattern (as
+defined above). The subpattern can be backreferenced and
+referenced in the right side of @code{s} commands.
+
+For example, if the string @samp{the red king} is matched against
+the pattern
+
+@example
+ the ((red|white) (king|queen))
+@end example
+
+@noindent
+the captured substrings are @samp{red king}, @samp{red},
+and @samp{king}, and are numbered 1, 2, and 3.
+
+The fact that plain parentheses fulfil two functions is not
+always helpful. There are often times when a grouping
+subpattern is required without a capturing requirement. If an
+opening parenthesis is followed by @code{?:}, the subpattern does
+not do any capturing, and is not counted when computing the
+number of any subsequent capturing subpatterns. For example,
+if the string @samp{the white queen} is matched against the pattern
+
+@example
+ the ((?:red|white) (king|queen))
+@end example
+
+@noindent
+the captured substrings are @samp{white queen} and @samp{queen},
+and are numbered 1 and 2. The maximum number of captured
+substrings is 99, while the maximum number of all subpatterns,
+both capturing and non-capturing, is 200.
+
+As a convenient shorthand, if any option settings are
+equired at the start of a non-capturing subpattern, the
+option letters may appear between the @code{?} and the
+@code{:}. Thus the two patterns
+
+@example
+ (?i:saturday|sunday)
+ (?:(?i)saturday|sunday)
+@end example
+
+@noindent
+match exactly the same set of strings. Because alternative
+branches are tried from left to right, and options are not
+reset until the end of the subpattern is reached, an option
+setting in one branch does affect subsequent branches, so
+the above patterns match @samp{SUNDAY} as well as @samp{Saturday}.
+
+
+@node Repetition
+@appendixsec Repetition
+@cindex Perl-style regular expressions, repetitions
+
+Repetition is specified by quantifiers, which can follow any
+of the following items:
+
+@itemize @bullet
+@item
+a single character, possibly escaped
+
+@item
+the @code{.} special character
+
+@item
+a character class
+
+@item
+a back reference (see next section)
+
+@item
+a parenthesized subpattern (unless it is an assertion; @pxref{Assertions})
+@end itemize
+
+The general repetition quantifier specifies a minimum and
+maximum number of permitted matches, by giving the two
+numbers in curly brackets (braces), separated by a comma.
+The numbers must be less than 65536, and the first must be
+less than or equal to the second. For example:
+
+@example
+ z@{2,4@}
+@end example
+
+@noindent
+matches @samp{zz}, @samp{zzz}, or @samp{zzzz}. A closing brace on its own
+is not a special character. If the second number is omitted,
+but the comma is present, there is no upper limit; if the
+second number and the comma are both omitted, the quantifier
+specifies an exact number of required matches. Thus
+
+@example
+ [aeiou]@{3,@}
+@end example
+
+@noindent
+matches at least 3 successive vowels, but may match many
+more, while
+
+@example
+ \d@{8@}
+@end example
+
+@noindent
+matches exactly 8 digits. An opening curly bracket that
+appears in a position where a quantifier is not allowed, or
+one that does not match the syntax of a quantifier, is taken
+as a literal character. For example, @{,6@} is not a quantifier,
+but a literal string of four characters.@footnote{It
+raises an error if @option{-R} is not used.}
+
+The quantifier @samp{@{0@}} is permitted, causing the expression to
+behave as if the previous item and the quantifier were not
+present.
+
+For convenience (and historical compatibility) the three
+most common quantifiers have single-character abbreviations:
+
+@table @code
+@item *
+is equivalent to @{0,@}
+
+@item +
+is equivalent to @{1,@}
+
+@item ?
+is equivalent to @{0,1@}
+@end table
+
+It is possible to construct infinite loops by following a
+subpattern that can match no characters with a quantifier
+that has no upper limit, for example:
+
+@example
+ (a?)*
+@end example
+
+Earlier versions of Perl used to give an error at
+compile time for such patterns. However, because there are
+cases where this can be useful, such patterns are now
+accepted, but if any repetition of the subpattern does in
+fact match no characters, the loop is forcibly broken.
+
+@cindex Greedy regular expression matching
+@cindex Perl-style regular expressions, stingy repetitions
+By default, the quantifiers are @dfn{greedy} like in @sc{posix}
+mode, that is, they match as much as possible (up to the maximum
+number of permitted times), without causing the rest of the
+pattern to fail. The classic example of where this gives problems
+is in trying to match comments in C programs. These appear between
+the sequences @code{/*} and @code{*/} and within the sequence, individual
+@code{*} and @code{/} characters may appear. An attempt to match C
+comments by applying the pattern
+
+@example
+ /\*.*\*/
+@end example
+
+@noindent
+to the string
+
+@example
+ /* first command */ not comment /* second comment */
+@end example
+
+@noindent
+
+fails, because it matches the entire string owing to the
+greediness of the @code{.*} item.
+
+However, if a quantifier is followed by a question mark, it
+ceases to be greedy, and instead matches the minimum number
+of times possible, so the pattern @code{/\*.*?\*/}
+does the right thing with the C comments. The meaning of the
+various quantifiers is not otherwise changed, just the preferred
+number of matches. Do not confuse this use of question
+mark with its use as a quantifier in its own right.
+Because it has two uses, it can sometimes appear doubled, as in
+
+@example
+ \d??\d
+@end example
+
+which matches one digit by preference, but can match two if
+that is the only way the rest of the pattern matches.
+
+Note that greediness does not matter when specifying addresses,
+but can be nevertheless used to improve performance.
+
+@ignore
+ If the PCRE_UNGREEDY option is set (an option which is not
+ available in Perl), the quantifiers are not greedy by
+ default, but individual ones can be made greedy by following
+ them with a question mark. In other words, it inverts the
+ default behaviour.
+@end ignore
+
+When a parenthesized subpattern is quantified with a minimum
+repeat count that is greater than 1 or with a limited maximum,
+more store is required for the compiled pattern, in
+proportion to the size of the minimum or maximum.
+
+@cindex Perl-style regular expressions, single line
+If a pattern starts with @code{.*} or @code{.@{0,@}} and the
+@code{S} modifier is used, the pattern is implicitly anchored,
+because whatever follows will be tried against every character
+position in the subject string, so there is no point in
+retrying the overall match at any position after the first.
+PCRE treats such a pattern as though it were preceded by \A.
+
+When a capturing subpattern is repeated, the value captured
+is the substring that matched the final iteration. For example,
+after
+
+@example
+ (tweedle[dume]@{3@}\s*)+
+@end example
+
+@noindent
+has matched @samp{tweedledum tweedledee} the value of the
+captured substring is @samp{tweedledee}. However, if there are
+nested capturing subpatterns, the corresponding captured
+values may have been set in previous iterations. For example,
+after
+
+@example
+ /(a|(b))+/
+@end example
+
+matches @samp{aba}, the value of the second captured substring is
+@samp{b}.
+
+@node Backreferences
+@appendixsec Backreferences
+@cindex Perl-style regular expressions, backreferences
+
+Outside a character class, a backslash followed by a digit
+greater than 0 (and possibly further digits) is a back
+reference to a capturing subpattern earlier (i.e. to its
+left) in the pattern, provided there have been that many
+previous capturing left parentheses.
+
+However, if the decimal number following the backslash is
+less than 10, it is always taken as a back reference, and
+causes an error only if there are not that many capturing
+left parentheses in the entire pattern. In other words, the
+parentheses that are referenced need not be to the left of
+the reference for numbers less than 10. @ref{Backslash}
+for further details of the handling of digits following a backslash.
+
+A back reference matches whatever actually matched the capturing
+subpattern in the current subject string, rather than
+anything matching the subpattern itself. So the pattern
+
+@example
+ (sens|respons)e and \1ibility
+@end example
+
+@noindent
+matches @samp{sense and sensibility} and @samp{response and responsibility},
+but not @samp{sense and responsibility}. If caseful
+matching is in force at the time of the back reference, the
+case of letters is relevant. For example,
+
+@example
+ ((?i)blah)\s+\1
+@end example
+
+@noindent
+matches @samp{blah blah} and @samp{Blah Blah}, but not
+@samp{BLAH blah}, even though the original capturing
+subpattern is matched caselessly.
+
+There may be more than one back reference to the same subpattern.
+Also, if a subpattern has not actually been used in a
+particular match, any back references to it always fail. For
+example, the pattern
+
+@example
+ (a|(bc))\2
+@end example
+
+@noindent
+always fails if it starts to match @samp{a} rather than
+@samp{bc}. Because there may be up to 99 back references, all
+digits following the backslash are taken as part of a potential
+back reference number; this is different from what happens
+in @sc{posix} mode. If the pattern continues with a digit
+character, some delimiter must be used to terminate the back
+reference. If the @code{X} modifier option is set, this can be
+whitespace. Otherwise an empty comment can be used, or the
+following character can be expressed in hexadecimal or octal.
+
+A back reference that occurs inside the parentheses to which
+it refers fails when the subpattern is first used, so, for
+example, @code{(a\1)} never matches. However, such references
+can be useful inside repeated subpatterns. For example, the
+pattern
+
+@example
+ (a|b\1)+
+@end example
+
+@noindent
+matches any number of @samp{a}s and also @samp{aba}, @samp{ababbaa},
+etc. At each iteration of the subpattern, the back reference matches
+the character string corresponding to the previous iteration. In
+order for this to work, the pattern must be such that the first
+iteration does not need to match the back reference. This can be
+done using alternation, as in the example above, or by a
+quantifier with a minimum of zero.
+
+@node Assertions
+@appendixsec Assertions
+@cindex Perl-style regular expressions, assertions
+@cindex Perl-style regular expressions, asserting subpatterns
+
+An assertion is a test on the characters following or
+preceding the current matching point that does not actually
+consume any characters. The simple assertions coded as @code{\b},
+@code{\B}, @code{\A}, @code{\Z}, @code{\z}, @code{^} and @code{$}
+are described above. More complicated assertions are coded as
+subpatterns. There are two kinds: those that look ahead of the
+current position in the subject string, and those that look behind it.
+
+@cindex Perl-style regular expressions, lookahead subpatterns
+An assertion subpattern is matched in the normal way, except
+that it does not cause the current matching position to be
+changed. Lookahead assertions start with @code{(?=} for positive
+assertions and @code{(?!} for negative assertions. For example,
+
+@example
+ \w+(?=;)
+@end example
+
+@noindent
+matches a word followed by a semicolon, but does not include
+the semicolon in the match, and
+
+@example
+ foo(?!bar)
+@end example
+
+@noindent
+matches any occurrence of @samp{foo} that is not followed by
+@samp{bar}.
+
+Note that the apparently similar pattern
+
+@example
+ (?!foo)bar
+@end example
+
+@noindent
+@cindex Perl-style regular expressions, lookbehind subpatterns
+finds any occurrence of @samp{bar} even if it is preceded by
+@samp{foo}, because the assertion @code{(?!foo)} is always true
+when the next three characters are @samp{bar}. A lookbehind
+assertion is needed to achieve this effect.
+Lookbehind assertions start with @code{(?<=} for positive
+assertions and @code{(?<!} for negative assertions. So,
+
+@example
+ (?<!foo)bar
+@end example
+
+achieves the required effect of finding an occurrence of
+@samp{bar} that is not preceded by @samp{foo}. The contents of a
+lookbehind assertion are restricted
+such that all the strings it matches must have a fixed
+length. However, if there are several alternatives, they do
+not all have to have the same fixed length. This is an extension
+compared with Perl 5.005, which requires all branches to match
+the same length of string. Thus
+
+@example
+ (?<=dogs|cats|)
+@end example
+
+@noindent
+is permitted, but the apparently equivalent regular expression
+
+@example
+ (?<!dogs?|cats?)
+@end example
+
+@noindent
+causes an error at compile time. Branches that match different
+length strings are permitted only at the top level of
+a lookbehind assertion: an assertion such as
+
+@example
+ (?<=ab(c|de))
+@end example
+
+@noindent
+is not permitted, because its single top-level branch can
+match two different lengths, but it is acceptable if rewritten
+to use two top-level branches:
+
+@example
+ (?<=abc|abde)
+@end example
+
+All this is required because lookbehind assertions simply
+move the current position back by the alternative's fixed
+width and then try to match. If there are
+insufficient characters before the current position, the
+match is deemed to fail. Lookbehinds, in conjunction with
+non-backtracking subpatterns can be particularly useful for
+matching at the ends of strings; an example is given at the end
+of the section on non-backtracking subpatterns.
+
+Several assertions (of any sort) may occur in succession.
+For example,
+
+@example
+ (?<=\d@{3@})(?<!999)foo
+@end example
+
+@noindent
+matches @samp{foo} preceded by three digits that are not @samp{999}.
+Notice that each of the assertions is applied independently
+at the same point in the subject string. First there is a
+check that the previous three characters are all digits, and
+then there is a check that the same three characters are not
+@samp{999}. This pattern does not match @samp{foo} preceded by six
+characters, the first of which are digits and the last three
+of which are not @samp{999}. For example, it doesn't match
+@samp{123abcfoo}. A pattern to do that is
+
+@example
+ (?<=\d@{3@}...)(?<!999)foo
+@end example
+
+@noindent
+This time the first assertion looks at the preceding six
+characters, checking that the first three are digits, and
+then the second assertion checks that the preceding three
+characters are not @samp{999}. Actually, assertions can be
+nested in any combination, so one can write this as
+
+@example
+ (?<=\d@{3@}(?!999)...)foo
+@end example
+
+or
+
+@example
+ (?<=\d@{3@}...(?<!999))foo
+@end example
+
+@noindent
+both of which might be considered more readable.
+
+Assertion subpatterns are not capturing subpatterns, and may
+not be repeated, because it makes no sense to assert the
+same thing several times. If any kind of assertion contains
+capturing subpatterns within it, these are counted for the
+purposes of numbering the capturing subpatterns in the whole
+pattern. However, substring capturing is carried out only
+for positive assertions, because it does not make sense for
+negative assertions.
+
+Assertions count towards the maximum of 200 parenthesized
+subpatterns.
+
+@node Non-backtracking subpatterns
+@appendixsec Non-backtracking subpatterns
+@cindex Perl-style regular expressions, non-backtracking subpatterns
+
+With both maximizing and minimizing repetition, failure of
+what follows normally causes the repeated item to be evaluated
+again to see if a different number of repeats allows the
+rest of the pattern to match. Sometimes it is useful to
+prevent this, either to change the nature of the match, or
+to cause it fail earlier than it otherwise might, when the
+author of the pattern knows there is no point in carrying
+on.
+
+Consider, for example, the pattern @code{\d+foo} when applied to
+the subject line
+
+@example
+ 123456bar
+@end example
+
+After matching all 6 digits and then failing to match @samp{foo},
+the normal action of the matcher is to try again with only 5
+digits matching the @code{\d+} item, and then with 4, and so on,
+before ultimately failing. Non-backtracking subpatterns
+provide the means for specifying that once a portion of the
+pattern has matched, it is not to be re-evaluated in this way,
+so the matcher would give up immediately on failing to match
+@samp{foo} the first time. The notation is another kind of special
+parenthesis, starting with @code{(?>} as in this example:
+
+@example
+ (?>\d+)bar
+@end example
+
+This kind of parenthesis ``locks up'' the part of the pattern
+it contains once it has matched, and a failure further into
+the pattern is prevented from backtracking into it.
+Backtracking past it to previous items, however, works as
+normal.
+
+Non-backtracking subpatterns are not capturing subpatterns. Simple
+cases such as the above example can be thought of as a maximizing
+repeat that must swallow everything it can. So,
+while both @code{\d+} and @code{\d+?} are prepared to adjust the number of
+digits they match in order to make the rest of the pattern
+match, @code{(?>\d+)} can only match an entire sequence of digits.
+
+This construction can of course contain arbitrarily complicated
+subpatterns, and it can be nested.
+
+@cindex Perl-style regular expressions, lookbehind subpatterns
+Non-backtracking subpatterns can be used in conjunction with look-behind
+assertions to specify efficient matching at the end
+of the subject string. Consider a simple pattern such as
+
+@example
+ abcd$
+@end example
+
+@noindent
+when applied to a long string which does not match. Because
+matching proceeds from left to right, @command{sed} will look for
+each @samp{a} in the subject and then see if what follows matches
+the rest of the pattern. If the pattern is specified as
+
+@example
+ ^.*abcd$
+@end example
+
+@noindent
+the initial @code{.*} matches the entire string at first, but when
+this fails (because there is no following @samp{a}), it backtracks
+to match all but the last character, then all but the
+last two characters, and so on. Once again the search for
+@samp{a} covers the entire string, from right to left, so we are
+no better off. However, if the pattern is written as
+
+@example
+ ^(?>.*)(?<=abcd)
+@end example
+
+there can be no backtracking for the .* item; it can match
+only the entire string. The subsequent lookbehind assertion
+does a single test on the last four characters. If it fails,
+the match fails immediately. For long strings, this approach
+makes a significant difference to the processing time.
+
+When a pattern contains an unlimited repeat inside a subpattern
+that can itself be repeated an unlimited number of
+times, the use of a once-only subpattern is the only way to
+avoid some failing matches taking a very long time
+indeed.@footnote{Actually, the matcher embedded in @value{SSED}
+ tries to do something for this in the simplest cases,
+ like @code{([^b]*b)*}. These cases are actually quite
+ common: they happen for example in a regular expression
+ like @code{\/\*([^*]*\*)*\/} which matches C comments.}
+
+The pattern
+
+@example
+ (\D+|<\d+>)*[!?]
+@end example
+
+([^0-9<]+<(\d+>)?)*[!?]
+
+@noindent
+matches an unlimited number of substrings that either consist
+of non-digits, or digits enclosed in angular brackets, followed by
+an exclamation or question mark. When it matches, it runs quickly.
+However, if it is applied to
+
+@example
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@end example
+
+@noindent
+it takes a long time before reporting failure. This is
+because the string can be divided between the two repeats in
+a large number of ways, and all have to be tried.@footnote{The
+example used @code{[!?]} rather than a single character at the end,
+because both @value{SSED} and Perl have an optimization that allows
+for fast failure when a single character is used. They
+remember the last single character that is required for a
+match, and fail early if it is not present in the string.}
+
+If the pattern is changed to
+
+@example
+ ((?>\D+)|<\d+>)*[!?]
+@end example
+
+sequences of non-digits cannot be broken, and failure happens
+quickly.
+
+@node Conditional subpatterns
+@appendixsec Conditional subpatterns
+@cindex Perl-style regular expressions, conditional subpatterns
+
+It is possible to cause the matching process to obey a subpattern
+conditionally or to choose between two alternative
+subpatterns, depending on the result of an assertion, or
+whether a previous capturing subpattern matched or not. The
+two possible forms of conditional subpattern are
+
+@example
+ (?(@var{condition})@var{yes-pattern})
+ (?(@var{condition})@var{yes-pattern}|@var{no-pattern})
+@end example
+
+If the condition is satisfied, the yes-pattern is used; otherwise
+the no-pattern (if present) is used. If there are more than two
+alternatives in the subpattern, a compile-time error occurs.
+
+There are two kinds of condition. If the text between the
+parentheses consists of a sequence of digits, the condition
+is satisfied if the capturing subpattern of that number has
+previously matched. The number must be greater than zero.
+Consider the following pattern, which contains non-significant
+white space to make it more readable (assume the @code{X} modifier)
+and to divide it into three parts for ease of discussion:
+
+@example
+ ( \( )? [^()]+ (?(1) \) )
+@end example
+
+The first part matches an optional opening parenthesis, and
+if that character is present, sets it as the first captured
+substring. The second part matches one or more characters
+that are not parentheses. The third part is a conditional
+subpattern that tests whether the first set of parentheses
+matched or not. If they did, that is, if subject started
+with an opening parenthesis, the condition is true, and so
+the yes-pattern is executed and a closing parenthesis is
+required. Otherwise, since no-pattern is not present, the
+subpattern matches nothing. In other words, this pattern
+matches a sequence of non-parentheses, optionally enclosed
+in parentheses.
+
+@cindex Perl-style regular expressions, lookahead subpatterns
+If the condition is not a sequence of digits, it must be an
+assertion. This may be a positive or negative lookahead or
+lookbehind assertion. Consider this pattern, again containing
+non-significant white space, and with the two alternatives
+on the second line:
+
+@example
+ (?(?=...[a-z])
+ \d\d-[a-z]@{3@}-\d\d |
+ \d\d-\d\d-\d\d )
+@end example
+
+The condition is a positive lookahead assertion that matches
+a letter that is three characters away from the current point.
+If a letter is found, the subject is matched against the first
+alternative @samp{@var{dd}-@var{aaa}-@var{dd}} (where @var{aaa} are
+letters and @var{dd} are digits); otherwise it is matched against
+the second alternative, @samp{@var{dd}-@var{dd}-@var{dd}}.
+
+
+@node Recursive patterns
+@appendixsec Recursive patterns
+@cindex Perl-style regular expressions, recursive patterns
+@cindex Perl-style regular expressions, recursion
+
+Consider the problem of matching a string in parentheses,
+allowing for unlimited nested parentheses. Without the use
+of recursion, the best that can be done is to use a pattern
+that matches up to some fixed depth of nesting. It is not
+possible to handle an arbitrary nesting depth. Perl 5.6 has
+provided an experimental facility that allows regular
+expressions to recurse (amongst other things). It does this
+by interpolating Perl code in the expression at run time,
+and the code can refer to the expression itself. A Perl pattern
+tern to solve the parentheses problem can be created like
+this:
+
+@example
+ $re = qr@{\( (?: (?>[^()]+) | (?p@{$re@}) )* \)@}x;
+@end example
+
+The @code{(?p@{...@})} item interpolates Perl code at run time,
+and in this case refers recursively to the pattern in which it
+appears. Obviously, @command{sed} cannot support the interpolation of
+Perl code. Instead, the special item @code{(?R)} is provided for
+the specific case of recursion. This pattern solves the
+parentheses problem (assume the @code{X} modifier option is used
+so that white space is ignored):
+
+@example
+ \( ( (?>[^()]+) | (?R) )* \)
+@end example
+
+First it matches an opening parenthesis. Then it matches any
+number of substrings which can either be a sequence of
+non-parentheses, or a recursive match of the pattern itself
+(i.e. a correctly parenthesized substring). Finally there is
+a closing parenthesis.
+
+This particular example pattern contains nested unlimited
+repeats, and so the use of a non-backtracking subpattern for
+matching strings of non-parentheses is important when applying
+the pattern to strings that do not match. For example, when
+it is applied to
+
+@example
+ (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+@end example
+
+it yields a ``no match'' response quickly. However, if a
+standard backtracking subpattern is not used, the match runs
+for a very long time indeed because there are so many different
+ways the @code{+} and @code{*} repeats can carve up the subject,
+and all have to be tested before failure can be reported.
+
+The values set for any capturing subpatterns are those from
+the outermost level of the recursion at which the subpattern
+value is set. If the pattern above is matched against
+
+@example
+ (ab(cd)ef)
+@end example
+
+@noindent
+the value for the capturing parentheses is @samp{ef}, which is
+the last value taken on at the top level.
+
+@node Comments
+@appendixsec Comments
+@cindex Perl-style regular expressions, comments
+
+The sequence (?# marks the start of a comment which continues
+ues up to the next closing parenthesis. Nested parentheses
+are not permitted. The characters that make up a comment
+play no part in the pattern matching at all.
+
+@cindex Perl-style regular expressions, extended
+If the @code{X} modifier option is used, an unescaped @code{#} character
+outside a character class introduces a comment that continues
+up to the next newline character in the pattern.
+@end ifset
+
+
+@page
+@node Concept Index
+@unnumbered Concept Index
+
+This is a general index of all issues discussed in this manual, with the
+exception of the @command{sed} commands and command-line options.
+
+@printindex cp
+
+@page
+@node Command and Option Index
+@unnumbered Command and Option Index
+
+This is an alphabetical list of all @command{sed} commands and command-line
+options.
+
+@printindex fn
+
+@contents
+@bye
+
+@c XXX FIXME: the term "cycle" is never defined...
diff --git a/doc/sed.x b/doc/sed.x
new file mode 100644
index 0000000..b25e60e
--- /dev/null
+++ b/doc/sed.x
@@ -0,0 +1,313 @@
+.SH NAME
+sed \- a Stream EDitor
+.SH SYNOPSIS
+.nf
+sed [-V] [--version] [--help] [-n] [--quiet] [--silent]
+ [-l N] [--line-length=N] [-u] [--unbuffered]
+ [-r] [--regexp-extended]
+ [-e script] [--expression=script]
+ [-f script-file] [--file=script-file]
+ [script-if-no-other-script]
+ [file...]
+.fi
+[DESCRIPTION]
+.ds sd \fIsed\fP
+.ds Sd \fISed\fP
+\*(Sd is a stream editor.
+A stream editor is used to perform basic text
+transformations on an input stream
+(a file or input from a pipeline).
+While in some ways similar to an editor which
+permits scripted edits (such as \fIed\fP),
+\*(sd works by making only one pass over the
+input(s), and is consequently more efficient.
+But it is \*(sd's ability to filter text in a pipeline
+which particularly distinguishes it from other types of
+editors.
+
+[COMMAND SYNOPSIS]
+This is just a brief synopsis of \*(sd commands to serve as
+a reminder to those who already know \*(sd;
+other documentation (such as the texinfo document)
+must be consulted for fuller descriptions.
+.SS
+Zero-address ``commands''
+.TP
+.RI :\ label
+Label for
+.B b
+and
+.B t
+commands.
+.TP
+.RI # comment
+The comment extends until the next newline (or the end of a
+.B -e
+script fragment).
+.TP
+}
+The closing bracket of a { } block.
+.SS
+Zero- or One- address commands
+.TP
+=
+Print the current line number.
+.TP
+a \e
+.TP
+.I text
+Append
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+i \e
+.TP
+.I text
+Insert
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+q
+Immediately quit the \*(sd script without processing
+any more input,
+except that if auto-print is not disabled
+the current pattern space will be printed.
+.TP
+Q
+Immediately quit the \*(sd script without processing
+any more input.
+.TP
+.RI r\ filename
+Append text read from
+.IR filename .
+.TP
+.RI R\ filename
+Append a line read from
+.IR filename .
+.SS
+Commands which accept address ranges
+.TP
+{
+Begin a block of commands (end with a }).
+.TP
+.RI b\ label
+Branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+.RI t\ label
+If a s/// has done a successful substitution since the
+last input line was read and since the last t or T
+command, then branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+.RI T\ label
+If no s/// has done a successful substitution since the
+last input line was read and since the last t or T
+command, then branch to
+.IR label ;
+if
+.I label
+is omitted, branch to end of script.
+.TP
+c \e
+.TP
+.I text
+Replace the selected lines with
+.IR text ,
+which has each embedded newline preceded by a backslash.
+.TP
+d
+Delete pattern space.
+Start next cycle.
+.TP
+D
+Delete up to the first embedded newline in the pattern space.
+Start next cycle, but skip reading from the input
+if there is still data in the pattern space.
+.TP
+h H
+Copy/append pattern space to hold space.
+.TP
+g G
+Copy/append hold space to pattern space.
+.TP
+x
+Exchange the contents of the hold and pattern spaces.
+.TP
+l
+List out the current line in a ``visually unambiguous'' form.
+.TP
+n N
+Read/append the next line of input into the pattern space.
+.TP
+p
+Print the current pattern space.
+.TP
+P
+Print up to the first embedded newline of the current pattern space.
+.TP
+.RI s/ regexp / replacement /
+Attempt to match
+.I regexp
+against the pattern space.
+If successful, replace that portion matched
+with
+.IR replacement .
+The
+.I replacement
+may contain the special character
+.B &
+to refer to that portion of the pattern space which matched,
+and the special escapes \e1 through \e9 to refer to the
+corresponding matching sub-expressions in the
+.IR regexp .
+.TP
+.RI w\ filename
+Write the current pattern space to
+.IR filename .
+.TP
+.RI W\ filename
+Write the first line of the current pattern space to
+.IR filename .
+.TP
+.RI y/ source / dest /
+Transliterate the characters in the pattern space which appear in
+.I source
+to the corresponding character in
+.IR dest .
+.SH
+Addresses
+\*(Sd commands can be given with no addresses, in which
+case the command will be executed for all input lines;
+with one address, in which case the command will only be executed
+for input lines which match that address; or with two
+addresses, in which case the command will be executed
+for all input lines which match the inclusive range of
+lines starting from the first address and continuing to
+the second address.
+Three things to note about address ranges:
+the syntax is
+.IR addr1 , addr2
+(i.e., the addresses are separated by a comma);
+the line which
+.I addr1
+matched will always be accepted,
+even if
+.I addr2
+selects an earlier line;
+and if
+.I addr2
+is a
+.IR regexp ,
+it will not be tested against the line that
+.I addr1
+matched.
+.PP
+After the address (or address-range),
+and before the command, a
+.B !
+may be inserted,
+which specifies that the command shall only be
+executed if the address (or address-range) does
+.B not
+match.
+.PP
+The following address types are supported:
+.TP
+.I number
+Match only the specified line
+.IR number .
+.TP
+.IR first ~ step
+Match every
+.IR step 'th
+line starting with line
+.IR first .
+For example, ``sed -n 1~2p'' will print all the odd-numbered lines in
+the input stream, and the address 2~5 will match every fifth line,
+starting with the second. (This is an extension.)
+.TP
+$
+Match the last line.
+.TP
+.RI / regexp /
+Match lines matching the regular expression
+.IR regexp .
+.TP
+.BI \fR\e\fPc regexp c
+Match lines matching the regular expression
+.IR regexp .
+The
+.B c
+may be any character.
+.PP
+GNU \*(sd also supports some special 2-address forms:
+.TP
+.RI 0, addr2
+Start out in "matched first address" state, until
+.I addr2
+is found.
+This is similar to
+.RI 1, addr2 ,
+except that if
+.I addr2
+matches the very first line of input the
+.RI 0, addr2
+form will be at the end of its range, whereas the
+.RI 1, addr2
+form will still be at the beginning of its range.
+.TP
+.IR addr1 ,+ N
+Will match
+.I addr1
+and the
+.I N
+lines following
+.IR addr1 .
+.TP
+.IR addr1 ,~ N
+Will match
+.I addr1
+and the lines following
+.I addr1
+until the next line whose input line number is a multiple of
+.IR N .
+
+[REGULAR EXPRESSIONS]
+POSIX.2 BREs
+.I should
+be supported, but they aren't completely because of performance
+problems.
+The
+.B \en
+sequence in a regular expression matches the newline character,
+and similarly for
+.BR \ea ,
+.BR \et ,
+and other sequences.
+
+[SEE ALSO]
+.BR awk (1),
+.BR ed (1),
+.BR grep (1),
+.BR tr (1),
+.BR perlre (1),
+sed.info,
+any of various books on \*(sd,
+.na
+the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.html),
+http://sed.sf.net/grabbag/.
+
+[BUGS]
+.PP
+E-mail bug reports to
+.BR bonzini@gnu.org .
+Be sure to include the word ``sed'' somewhere in the ``Subject:'' field.
+Also, please include the output of ``sed --version'' in the body
+of your report if at all possible.
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000..d3a153f
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,16 @@
+## Process this file with automake to produce Makefile.in
+noinst_LIBRARIES = libsed.a
+noinst_HEADERS = getopt.h utils.h obstack.h regex_.h regex_internal.h \
+ strverscmp.h stdbool_.h
+
+libsed_a_SOURCES = getopt1.c getopt.c utils.c
+
+EXTRA_DIST = memmove.c strerror.c regcomp.c regexec.c regex_internal.c
+
+AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_srcdir)/intl -I$(top_srcdir) \
+ -I$(top_builddir)/lib
+
+libsed_a_LIBADD = @LIBOBJS@ @ALLOCA@
+libsed_a_DEPENDENCIES = $(libsed_a_LIBADD)
+
+DISTCLEANFILES = regex.h stdbool.h
diff --git a/lib/alloca.c b/lib/alloca.c
new file mode 100644
index 0000000..c1699c4
--- /dev/null
+++ b/lib/alloca.c
@@ -0,0 +1,504 @@
+/* alloca.c -- allocate automatically reclaimed memory
+ (Mostly) portable public-domain implementation -- D A Gwyn
+
+ This implementation of the PWB library alloca function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+ J.Otto Tennant <jot@cray.com> contributed the Cray support.
+
+ There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef emacs
+#include "blockinput.h"
+#endif
+
+/* If compiling with GCC 2, this file's not needed. */
+#if !defined (__GNUC__) || __GNUC__ < 2
+
+/* If someone has defined alloca as a macro,
+ there must be some other way alloca is supposed to work. */
+#ifndef alloca
+
+#ifdef emacs
+#ifdef static
+/* actually, only want this if static is defined as ""
+ -- this is for usg, in which emacs must undefine static
+ in order to make unexec workable
+ */
+#ifndef STACK_DIRECTION
+you
+lose
+-- must know STACK_DIRECTION at compile-time
+#endif /* STACK_DIRECTION undefined */
+#endif /* static */
+#endif /* emacs */
+
+/* If your stack is a linked list of frames, you have to
+ provide an "address metric" ADDRESS_FUNCTION macro. */
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+long i00afunc ();
+#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
+#else
+#define ADDRESS_FUNCTION(arg) &(arg)
+#endif
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* Different portions of Emacs need to call different versions of
+ malloc. The Emacs executable needs alloca to call xmalloc, because
+ ordinary malloc isn't protected from input signals. On the other
+ hand, the utilities in lib-src need alloca to call malloc; some of
+ them are very simple, and don't have an xmalloc routine.
+
+ Non-Emacs programs expect this to call xmalloc.
+
+ Callers below should use malloc. */
+
+#ifndef emacs
+#define malloc xmalloc
+#endif
+extern pointer malloc ();
+
+/* Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+
+#ifndef STACK_DIRECTION
+#define STACK_DIRECTION 0 /* Direction unknown. */
+#endif
+
+#if STACK_DIRECTION != 0
+
+#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
+
+#else /* STACK_DIRECTION == 0; need run-time code. */
+
+static int stack_dir; /* 1 or -1 once known. */
+#define STACK_DIR stack_dir
+
+static void
+find_stack_direction ()
+{
+ static char *addr = NULL; /* Address of first `dummy', once known. */
+ auto char dummy; /* To get stack address. */
+
+ if (addr == NULL)
+ { /* Initial entry. */
+ addr = ADDRESS_FUNCTION (dummy);
+
+ find_stack_direction (); /* Recurse once. */
+ }
+ else
+ {
+ /* Second entry. */
+ if (ADDRESS_FUNCTION (dummy) > addr)
+ stack_dir = 1; /* Stack grew upward. */
+ else
+ stack_dir = -1; /* Stack grew downward. */
+ }
+}
+
+#endif /* STACK_DIRECTION == 0 */
+
+/* An "alloca header" is used to:
+ (a) chain together all alloca'ed blocks;
+ (b) keep track of stack depth.
+
+ It is very important that sizeof(header) agree with malloc
+ alignment chunk size. The following default should work okay. */
+
+#ifndef ALIGN_SIZE
+#define ALIGN_SIZE sizeof(double)
+#endif
+
+typedef union hdr
+{
+ char align[ALIGN_SIZE]; /* To force sizeof(header). */
+ struct
+ {
+ union hdr *next; /* For chaining headers. */
+ char *deep; /* For stack depth measure. */
+ } h;
+} header;
+
+static header *last_alloca_header = NULL; /* -> last alloca header. */
+
+/* Return a pointer to at least SIZE bytes of storage,
+ which will be automatically reclaimed upon exit from
+ the procedure that called alloca. Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32. */
+
+pointer
+alloca (size)
+ unsigned size;
+{
+ auto char probe; /* Probes stack depth: */
+ register char *depth = ADDRESS_FUNCTION (probe);
+
+#if STACK_DIRECTION == 0
+ if (STACK_DIR == 0) /* Unknown growth direction. */
+ find_stack_direction ();
+#endif
+
+ /* Reclaim garbage, defined as all alloca'd storage that
+ was allocated from deeper in the stack than currently. */
+
+ {
+ register header *hp; /* Traverses linked list. */
+
+#ifdef emacs
+ BLOCK_INPUT;
+#endif
+
+ for (hp = last_alloca_header; hp != NULL;)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
+ {
+ register header *np = hp->h.next;
+
+ free ((pointer) hp); /* Collect garbage. */
+
+ hp = np; /* -> next header. */
+ }
+ else
+ break; /* Rest are not deeper. */
+
+ last_alloca_header = hp; /* -> last valid storage. */
+
+#ifdef emacs
+ UNBLOCK_INPUT;
+#endif
+ }
+
+ if (size == 0)
+ return NULL; /* No allocation required. */
+
+ /* Allocate combined header + user data storage. */
+
+ {
+ register pointer new = malloc (sizeof (header) + size);
+ /* Address of header. */
+
+ if (new == 0)
+ abort();
+
+ ((header *) new)->h.next = last_alloca_header;
+ ((header *) new)->h.deep = depth;
+
+ last_alloca_header = (header *) new;
+
+ /* User storage begins just after header. */
+
+ return (pointer) ((char *) new + sizeof (header));
+ }
+}
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+
+#ifdef DEBUG_I00AFUNC
+#include <stdio.h>
+#endif
+
+#ifndef CRAY_STACK
+#define CRAY_STACK
+#ifndef CRAY2
+/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
+struct stack_control_header
+ {
+ long shgrow:32; /* Number of times stack has grown. */
+ long shaseg:32; /* Size of increments to stack. */
+ long shhwm:32; /* High water mark of stack. */
+ long shsize:32; /* Current size of stack (all segments). */
+ };
+
+/* The stack segment linkage control information occurs at
+ the high-address end of a stack segment. (The stack
+ grows from low addresses to high addresses.) The initial
+ part of the stack segment linkage control information is
+ 0200 (octal) words. This provides for register storage
+ for the routine which overflows the stack. */
+
+struct stack_segment_linkage
+ {
+ long ss[0200]; /* 0200 overflow words. */
+ long sssize:32; /* Number of words in this segment. */
+ long ssbase:32; /* Offset to stack base. */
+ long:32;
+ long sspseg:32; /* Offset to linkage control of previous
+ segment of stack. */
+ long:32;
+ long sstcpt:32; /* Pointer to task common address block. */
+ long sscsnm; /* Private control structure number for
+ microtasking. */
+ long ssusr1; /* Reserved for user. */
+ long ssusr2; /* Reserved for user. */
+ long sstpid; /* Process ID for pid based multi-tasking. */
+ long ssgvup; /* Pointer to multitasking thread giveup. */
+ long sscray[7]; /* Reserved for Cray Research. */
+ long ssa0;
+ long ssa1;
+ long ssa2;
+ long ssa3;
+ long ssa4;
+ long ssa5;
+ long ssa6;
+ long ssa7;
+ long sss0;
+ long sss1;
+ long sss2;
+ long sss3;
+ long sss4;
+ long sss5;
+ long sss6;
+ long sss7;
+ };
+
+#else /* CRAY2 */
+/* The following structure defines the vector of words
+ returned by the STKSTAT library routine. */
+struct stk_stat
+ {
+ long now; /* Current total stack size. */
+ long maxc; /* Amount of contiguous space which would
+ be required to satisfy the maximum
+ stack demand to date. */
+ long high_water; /* Stack high-water mark. */
+ long overflows; /* Number of stack overflow ($STKOFEN) calls. */
+ long hits; /* Number of internal buffer hits. */
+ long extends; /* Number of block extensions. */
+ long stko_mallocs; /* Block allocations by $STKOFEN. */
+ long underflows; /* Number of stack underflow calls ($STKRETN). */
+ long stko_free; /* Number of deallocations by $STKRETN. */
+ long stkm_free; /* Number of deallocations by $STKMRET. */
+ long segments; /* Current number of stack segments. */
+ long maxs; /* Maximum number of stack segments so far. */
+ long pad_size; /* Stack pad size. */
+ long current_address; /* Current stack segment address. */
+ long current_size; /* Current stack segment size. This
+ number is actually corrupted by STKSTAT to
+ include the fifteen word trailer area. */
+ long initial_address; /* Address of initial segment. */
+ long initial_size; /* Size of initial segment. */
+ };
+
+/* The following structure describes the data structure which trails
+ any stack segment. I think that the description in 'asdef' is
+ out of date. I only describe the parts that I am sure about. */
+
+struct stk_trailer
+ {
+ long this_address; /* Address of this block. */
+ long this_size; /* Size of this block (does not include
+ this trailer). */
+ long unknown2;
+ long unknown3;
+ long link; /* Address of trailer block of previous
+ segment. */
+ long unknown5;
+ long unknown6;
+ long unknown7;
+ long unknown8;
+ long unknown9;
+ long unknown10;
+ long unknown11;
+ long unknown12;
+ long unknown13;
+ long unknown14;
+ };
+
+#endif /* CRAY2 */
+#endif /* not CRAY_STACK */
+
+#ifdef CRAY2
+/* Determine a "stack measure" for an arbitrary ADDRESS.
+ I doubt that "lint" will like this much. */
+
+static long
+i00afunc (long *address)
+{
+ struct stk_stat status;
+ struct stk_trailer *trailer;
+ long *block, size;
+ long result = 0;
+
+ /* We want to iterate through all of the segments. The first
+ step is to get the stack status structure. We could do this
+ more quickly and more directly, perhaps, by referencing the
+ $LM00 common block, but I know that this works. */
+
+ STKSTAT (&status);
+
+ /* Set up the iteration. */
+
+ trailer = (struct stk_trailer *) (status.current_address
+ + status.current_size
+ - 15);
+
+ /* There must be at least one stack segment. Therefore it is
+ a fatal error if "trailer" is null. */
+
+ if (trailer == 0)
+ abort ();
+
+ /* Discard segments that do not contain our argument address. */
+
+ while (trailer != 0)
+ {
+ block = (long *) trailer->this_address;
+ size = trailer->this_size;
+ if (block == 0 || size == 0)
+ abort ();
+ trailer = (struct stk_trailer *) trailer->link;
+ if ((block <= address) && (address < (block + size)))
+ break;
+ }
+
+ /* Set the result to the offset in this segment and add the sizes
+ of all predecessor segments. */
+
+ result = address - block;
+
+ if (trailer == 0)
+ {
+ return result;
+ }
+
+ do
+ {
+ if (trailer->this_size <= 0)
+ abort ();
+ result += trailer->this_size;
+ trailer = (struct stk_trailer *) trailer->link;
+ }
+ while (trailer != 0);
+
+ /* We are done. Note that if you present a bogus address (one
+ not in any segment), you will get a different number back, formed
+ from subtracting the address of the first block. This is probably
+ not what you want. */
+
+ return (result);
+}
+
+#else /* not CRAY2 */
+/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
+ Determine the number of the cell within the stack,
+ given the address of the cell. The purpose of this
+ routine is to linearize, in some sense, stack addresses
+ for alloca. */
+
+static long
+i00afunc (long address)
+{
+ long stkl = 0;
+
+ long size, pseg, this_segment, stack;
+ long result = 0;
+
+ struct stack_segment_linkage *ssptr;
+
+ /* Register B67 contains the address of the end of the
+ current stack segment. If you (as a subprogram) store
+ your registers on the stack and find that you are past
+ the contents of B67, you have overflowed the segment.
+
+ B67 also points to the stack segment linkage control
+ area, which is what we are really interested in. */
+
+ stkl = CRAY_STACKSEG_END ();
+ ssptr = (struct stack_segment_linkage *) stkl;
+
+ /* If one subtracts 'size' from the end of the segment,
+ one has the address of the first word of the segment.
+
+ If this is not the first segment, 'pseg' will be
+ nonzero. */
+
+ pseg = ssptr->sspseg;
+ size = ssptr->sssize;
+
+ this_segment = stkl - size;
+
+ /* It is possible that calling this routine itself caused
+ a stack overflow. Discard stack segments which do not
+ contain the target address. */
+
+ while (!(this_segment <= address && address <= stkl))
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
+#endif
+ if (pseg == 0)
+ break;
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ this_segment = stkl - size;
+ }
+
+ result = address - this_segment;
+
+ /* If you subtract pseg from the current end of the stack,
+ you get the address of the previous stack segment's end.
+ This seems a little convoluted to me, but I'll bet you save
+ a cycle somewhere. */
+
+ while (pseg != 0)
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o\n", pseg, size);
+#endif
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ result += size;
+ }
+ return (result);
+}
+
+#endif /* not CRAY2 */
+#endif /* CRAY */
+
+#endif /* no alloca */
+#endif /* not GCC version 2 */
diff --git a/lib/getline.c b/lib/getline.c
new file mode 100644
index 0000000..defaeda
--- /dev/null
+++ b/lib/getline.c
@@ -0,0 +1,110 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#undef _GNU_SOURCE
+
+#include <sys/types.h>
+#include <stdio.h>
+
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#else
+# include <string.h>
+#endif /* HAVE_STRINGS_H */
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif /* HAVE_STDLIB_H */
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#include <limits.h>
+#include <errno.h>
+
+/* Read up to (and including) a '\n' from STREAM into *LINEPTR
+ (and null-terminate it). *LINEPTR is a pointer returned from malloc (or
+ NULL), pointing to *N characters of space. It is realloc'd as
+ necessary. Returns the number of characters read (not including the
+ null terminator), or -1 on error or EOF. */
+
+size_t
+getline (lineptr, n, stream)
+ char **lineptr;
+ size_t *n;
+ FILE *stream;
+{
+ char *line, *p;
+ long size, copy;
+
+ if (lineptr == NULL || n == NULL)
+ {
+ errno = EINVAL;
+ return (size_t) -1;
+ }
+
+ if (ferror (stream))
+ return (size_t) -1;
+
+ /* Make sure we have a line buffer to start with. */
+ if (*lineptr == NULL || *n < 2) /* !seen and no buf yet need 2 chars. */
+ {
+#ifndef MAX_CANON
+#define MAX_CANON 256
+#endif
+ if (!*lineptr)
+ line = (char *) malloc (MAX_CANON);
+ else
+ line = (char *) realloc (*lineptr, MAX_CANON);
+ if (line == NULL)
+ return (size_t) -1;
+ *lineptr = line;
+ *n = MAX_CANON;
+ }
+
+ line = *lineptr;
+ size = *n;
+
+ copy = size;
+ p = line;
+
+ while (1)
+ {
+ long len;
+
+ while (--copy > 0)
+ {
+ register int c = getc (stream);
+ if (c == EOF)
+ goto lose;
+ else if ((*p++ = c) == '\n')
+ goto win;
+ }
+
+ /* Need to enlarge the line buffer. */
+ len = p - line;
+ size *= 2;
+ line = (char *) realloc (line, size);
+ if (line == NULL)
+ goto lose;
+ *lineptr = line;
+ *n = size;
+ p = line + len;
+ copy = size - len;
+ }
+
+ lose:
+ if (p == *lineptr)
+ return (size_t) -1;
+
+ /* Return a partial line since we got an error in the middle. */
+ win:
+#if defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) || defined(MSDOS) || defined(__EMX__)
+ if (p - 2 >= *lineptr && p[-2] == '\r')
+ p[-2] = p[-1], --p;
+#endif
+ *p = '\0';
+ return p - *lineptr;
+}
diff --git a/lib/getopt.c b/lib/getopt.c
new file mode 100644
index 0000000..395d597
--- /dev/null
+++ b/lib/getopt.c
@@ -0,0 +1,1049 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to drepper@gnu.org
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98
+ Free Software Foundation, Inc.
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@gnu.org.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+# ifndef const
+# define const
+# endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+# define ELIDE_CODE
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+# include <stdlib.h>
+# include <unistd.h>
+#endif /* GNU C library. */
+
+#ifdef VMS
+# include <unixlib.h>
+# if HAVE_STRING_H - 0
+# include <string.h>
+# endif
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+ When compiling libc, the _ macro is predefined. */
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# define _(msgid) gettext (msgid)
+# else
+# define _(msgid) (msgid)
+# endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* 1003.2 says this must be 1 before any call. */
+int optind = 1;
+
+/* Formerly, initialization of getopt depended on optind==0, which
+ causes problems with re-calling getopt as programs generally don't
+ know that. */
+
+int __getopt_initialized = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+# include <string.h>
+# define my_index strchr
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+#ifndef strncmp
+extern int strncmp ();
+#endif
+
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+# if (!defined __STDC__ || !__STDC__) && !defined strlen
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+# endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+ indicating ARGV elements that should not be considered arguments. */
+
+/* Defined in getopt_init.c */
+extern char *__getopt_nonoption_flags;
+
+static int nonoption_flags_max_len;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+ is valid for the getopt call we must make sure that the ARGV passed
+ to getopt is that one passed to the process. */
+static void
+__attribute__ ((unused))
+store_args_and_env (int argc, char *const *argv)
+{
+ /* XXX This is no good solution. We should rather copy the args so
+ that we can compare them later. But we must not use malloc(3). */
+ original_argc = argc;
+ original_argv = argv;
+}
+# ifdef text_set_element
+text_set_element (__libc_subinit, store_args_and_env);
+# endif /* text_set_element */
+
+# define SWAP_FLAGS(ch1, ch2) \
+ if (nonoption_flags_len > 0) \
+ { \
+ char __tmp = __getopt_nonoption_flags[ch1]; \
+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
+ __getopt_nonoption_flags[ch2] = __tmp; \
+ }
+#else /* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif /* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+#if defined __STDC__ && __STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+#ifdef _LIBC
+ /* First make sure the handling of the `__getopt_nonoption_flags'
+ string can work normally. Our top argument must be in the range
+ of the string. */
+ if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
+ {
+ /* We must extend the array. The user plays games with us and
+ presents new arguments. */
+ char *new_str = malloc (top + 1);
+ if (new_str == NULL)
+ nonoption_flags_len = nonoption_flags_max_len = 0;
+ else
+ {
+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
+ nonoption_flags_max_len),
+ '\0', top + 1 - nonoption_flags_max_len);
+ nonoption_flags_max_len = top + 1;
+ __getopt_nonoption_flags = new_str;
+ }
+ }
+#endif
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ SWAP_FLAGS (bottom + i, middle + i);
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made. */
+
+#if defined __STDC__ && __STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+#ifdef _LIBC
+ if (posixly_correct == NULL
+ && argc == original_argc && argv == original_argv)
+ {
+ if (nonoption_flags_max_len == 0)
+ {
+ if (__getopt_nonoption_flags == NULL
+ || __getopt_nonoption_flags[0] == '\0')
+ nonoption_flags_max_len = -1;
+ else
+ {
+ const char *orig_str = __getopt_nonoption_flags;
+ int len = nonoption_flags_max_len = strlen (orig_str);
+ if (nonoption_flags_max_len < argc)
+ nonoption_flags_max_len = argc;
+ __getopt_nonoption_flags =
+ (char *) malloc (nonoption_flags_max_len);
+ if (__getopt_nonoption_flags == NULL)
+ nonoption_flags_max_len = -1;
+ else
+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+ '\0', nonoption_flags_max_len - len);
+ }
+ }
+ nonoption_flags_len = nonoption_flags_max_len;
+ }
+ else
+ nonoption_flags_len = 0;
+#endif
+
+ return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns -1.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ optarg = NULL;
+
+ if (optind == 0 || !__getopt_initialized)
+ {
+ if (optind == 0)
+ optind = 1; /* Don't scan ARGV[0], the program name. */
+ optstring = _getopt_initialize (argc, argv, optstring);
+ __getopt_initialized = 1;
+ }
+
+ /* Test whether ARGV[optind] points to a non-option argument.
+ Either it does not have option syntax, or there is an environment flag
+ from the shell indicating it is not an option. The later information
+ is only used when the used in the GNU libc. */
+#ifdef _LIBC
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
+ || (optind < nonoption_flags_len \
+ && __getopt_nonoption_flags[optind] == '1'))
+#else
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ /* Advance to the next ARGV-element. */
+
+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+ moved back by the user (who may also have changed the arguments). */
+ if (last_nonopt > optind)
+ last_nonopt = optind;
+ if (first_nonopt > optind)
+ first_nonopt = optind;
+
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc && NONOPTION_P)
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* The special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return -1;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if (NONOPTION_P)
+ {
+ if (ordering == REQUIRE_ORDER)
+ return -1;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Skip the initial punctuation. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = -1;
+ int option_index;
+
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar)
+ == (unsigned int) strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ _("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ _("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+
+ nextchar += strlen (nextchar);
+
+ optopt = pfound->val;
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ optopt = pfound->val;
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next short option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: illegal option -- %c\n"),
+ argv[0], c);
+ else
+ fprintf (stderr, _("%s: invalid option -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ return '?';
+ }
+ /* Convenience. Treat POSIX -W foo same as long option --foo */
+ if (temp[0] == 'W' && temp[1] == ';')
+ {
+ char *nameend;
+ const struct option *p;
+ const struct option *pfound = NULL;
+ int exact = 0;
+ int ambig = 0;
+ int indfound = 0;
+ int option_index;
+
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ return c;
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+
+ /* optarg is now the argument, see if it's in the
+ table of longopts. */
+
+ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
+
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
+ {
+ if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second or later nonexact match found. */
+ ambig = 1;
+ }
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ if (*nameend)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
+ else
+ {
+ if (opterr)
+ fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr,
+ _("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ nextchar = NULL;
+ return 'W'; /* Let the application handle it. */
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = NULL;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr,
+ _("%s: option requires an argument -- %c\n"),
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/getopt.h b/lib/getopt.h
new file mode 100644
index 0000000..fb30719
--- /dev/null
+++ b/lib/getopt.h
@@ -0,0 +1,133 @@
+/* Declarations for getopt.
+ Copyright (C) 1989,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@gnu.org.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if defined (__STDC__) && __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* getopt.h */
diff --git a/lib/getopt1.c b/lib/getopt1.c
new file mode 100644
index 0000000..ff25737
--- /dev/null
+++ b/lib/getopt1.c
@@ -0,0 +1,190 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
+ Free Software Foundation, Inc.
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@gnu.org.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "getopt.h"
+
+#if !defined __STDC__ || !__STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* Not ELIDE_CODE. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/lib/memchr.c b/lib/memchr.c
new file mode 100644
index 0000000..f48388f
--- /dev/null
+++ b/lib/memchr.c
@@ -0,0 +1,200 @@
+/* Copyright (C) 1991, 1993, 1996, 1997 Free Software Foundation, Inc.
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@gnu.org.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#undef __ptr_t
+#if defined (__cplusplus) || (defined (__STDC__) && __STDC__)
+# define __ptr_t void *
+#else /* Not C++ or ANSI C. */
+# define __ptr_t char *
+#endif /* C++ or ANSI C. */
+
+#if defined (_LIBC)
+# include <string.h>
+#endif
+
+#if defined (HAVE_LIMITS_H) || defined (_LIBC)
+# include <limits.h>
+#endif
+
+#define LONG_MAX_32_BITS 2147483647
+
+#ifndef LONG_MAX
+#define LONG_MAX LONG_MAX_32_BITS
+#endif
+
+#include <sys/types.h>
+
+#undef memchr
+
+
+/* Search no more than N bytes of S for C. */
+__ptr_t
+memchr (s, c, n)
+ const __ptr_t s;
+ int c;
+ size_t n;
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+
+ c = (unsigned char) c;
+
+ /* Handle the first few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s;
+ n > 0 && ((unsigned long int) char_ptr
+ & (sizeof (longword) - 1)) != 0;
+ --n, ++char_ptr)
+ if (*char_ptr == c)
+ return (__ptr_t) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to 8-byte longwords. */
+
+ longword_ptr = (unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ if (sizeof (longword) != 4 && sizeof (longword) != 8)
+ abort ();
+
+#if LONG_MAX <= LONG_MAX_32_BITS
+ magic_bits = 0x7efefeff;
+#else
+ magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff;
+#endif
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+#if LONG_MAX > LONG_MAX_32_BITS
+ charmask |= charmask << 32;
+#endif
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= sizeof (longword))
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *longword_ptr++ ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
+
+ if (cp[0] == c)
+ return (__ptr_t) cp;
+ if (cp[1] == c)
+ return (__ptr_t) &cp[1];
+ if (cp[2] == c)
+ return (__ptr_t) &cp[2];
+ if (cp[3] == c)
+ return (__ptr_t) &cp[3];
+#if LONG_MAX > 2147483647
+ if (cp[4] == c)
+ return (__ptr_t) &cp[4];
+ if (cp[5] == c)
+ return (__ptr_t) &cp[5];
+ if (cp[6] == c)
+ return (__ptr_t) &cp[6];
+ if (cp[7] == c)
+ return (__ptr_t) &cp[7];
+#endif
+ }
+
+ n -= sizeof (longword);
+ }
+
+ char_ptr = (const unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*char_ptr == c)
+ return (__ptr_t) char_ptr;
+ else
+ ++char_ptr;
+ }
+
+ return 0;
+}
diff --git a/lib/memcmp.c b/lib/memcmp.c
new file mode 100644
index 0000000..ace5d40
--- /dev/null
+++ b/lib/memcmp.c
@@ -0,0 +1,396 @@
+/* Copyright (C) 1991, 1993, 1995, 1997, 1998 Free Software Foundation, Inc.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ NOTE: The canonical source of this file is maintained with the GNU C Library.
+ Bugs can be reported to bug-glibc@gnu.org.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#undef __ptr_t
+#if defined __cplusplus || (defined __STDC__ && __STDC__)
+# define __ptr_t void *
+#else /* Not C++ or ANSI C. */
+# undef const
+# define const
+# define __ptr_t char *
+#endif /* C++ or ANSI C. */
+
+#ifndef __P
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define __P(args) args
+# else
+# define __P(args) ()
+# endif /* GCC. */
+#endif /* Not __P. */
+
+#if defined HAVE_STRING_H || defined _LIBC
+# include <string.h>
+#endif
+
+#undef memcmp
+
+#ifdef _LIBC
+
+# include <memcopy.h>
+# include <endian.h>
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define WORDS_BIGENDIAN
+# endif
+
+#else /* Not in the GNU C library. */
+
+# include <sys/types.h>
+
+/* Type to use for aligned memory operations.
+ This should normally be the biggest type supported by a single load
+ and store. Must be an unsigned type. */
+# define op_t unsigned long int
+# define OPSIZ (sizeof(op_t))
+
+/* Threshold value for when to enter the unrolled loops. */
+# define OP_T_THRES 16
+
+/* Type to use for unaligned operations. */
+typedef unsigned char byte;
+
+# ifndef WORDS_BIGENDIAN
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
+# else
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
+# endif
+
+#endif /* In the GNU C library. */
+
+#ifdef WORDS_BIGENDIAN
+# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
+#else
+# define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
+#endif
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */
+
+/* The strategy of this memcmp is:
+
+ 1. Compare bytes until one of the block pointers is aligned.
+
+ 2. Compare using memcmp_common_alignment or
+ memcmp_not_common_alignment, regarding the alignment of the other
+ block after the initial byte operations. The maximum number of
+ full words (of type op_t) are compared in this way.
+
+ 3. Compare the few remaining bytes. */
+
+#ifndef WORDS_BIGENDIAN
+/* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
+ A and B are known to be different.
+ This is needed only on little-endian machines. */
+
+static int memcmp_bytes __P((op_t, op_t));
+
+# ifdef __GNUC__
+__inline
+# endif
+static int
+memcmp_bytes (a, b)
+ op_t a, b;
+{
+ long int srcp1 = (long int) &a;
+ long int srcp2 = (long int) &b;
+ op_t a0, b0;
+
+ do
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ }
+ while (a0 == b0);
+ return a0 - b0;
+}
+#endif
+
+static int memcmp_common_alignment __P((long, long, size_t));
+
+/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
+ objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
+ memory operations on `op_t's. */
+#ifdef __GNUC__
+__inline
+#endif
+static int
+memcmp_common_alignment (srcp1, srcp2, len)
+ long int srcp1;
+ long int srcp2;
+ size_t len;
+{
+ op_t a0, a1;
+ op_t b0, b1;
+
+ switch (len % 4)
+ {
+ default: /* Avoid warning about uninitialized local variables. */
+ case 2:
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ srcp1 -= 2 * OPSIZ;
+ srcp2 -= 2 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp1)[0];
+ b1 = ((op_t *) srcp2)[0];
+ srcp1 -= OPSIZ;
+ srcp2 -= OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return 0;
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ goto do3;
+ case 1:
+ a1 = ((op_t *) srcp1)[0];
+ b1 = ((op_t *) srcp2)[0];
+ srcp1 += OPSIZ;
+ srcp2 += OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ /* Fall through. */
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+
+ do3:
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[1];
+ if (a0 != b0)
+ return CMP_LT_OR_GT (a0, b0);
+
+ do2:
+ a0 = ((op_t *) srcp1)[2];
+ b0 = ((op_t *) srcp2)[2];
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+
+ do1:
+ a1 = ((op_t *) srcp1)[3];
+ b1 = ((op_t *) srcp2)[3];
+ if (a0 != b0)
+ return CMP_LT_OR_GT (a0, b0);
+
+ srcp1 += 4 * OPSIZ;
+ srcp2 += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ if (a1 != b1)
+ return CMP_LT_OR_GT (a1, b1);
+ return 0;
+}
+
+static int memcmp_not_common_alignment __P((long, long, size_t));
+
+/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
+ `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
+ operations on `op_t', but SRCP1 *should be unaligned*. */
+#ifdef __GNUC__
+__inline
+#endif
+static int
+memcmp_not_common_alignment (srcp1, srcp2, len)
+ long int srcp1;
+ long int srcp2;
+ size_t len;
+{
+ op_t a0, a1, a2, a3;
+ op_t b0, b1, b2, b3;
+ op_t x;
+ int shl, shr;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp1 to make it aligned for comparison. */
+
+ shl = 8 * (srcp1 % OPSIZ);
+ shr = 8 * OPSIZ - shl;
+
+ /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp1 &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ default: /* Avoid warning about uninitialized local variables. */
+ case 2:
+ a1 = ((op_t *) srcp1)[0];
+ a2 = ((op_t *) srcp1)[1];
+ b2 = ((op_t *) srcp2)[0];
+ srcp1 -= 1 * OPSIZ;
+ srcp2 -= 2 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp1)[0];
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[0];
+ srcp2 -= 1 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return 0;
+ a3 = ((op_t *) srcp1)[0];
+ a0 = ((op_t *) srcp1)[1];
+ b0 = ((op_t *) srcp2)[0];
+ srcp1 += 1 * OPSIZ;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp1)[0];
+ a3 = ((op_t *) srcp1)[1];
+ b3 = ((op_t *) srcp2)[0];
+ srcp1 += 2 * OPSIZ;
+ srcp2 += 1 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ /* Fall through. */
+ }
+
+ do
+ {
+ a0 = ((op_t *) srcp1)[0];
+ b0 = ((op_t *) srcp2)[0];
+ x = MERGE(a2, shl, a3, shr);
+ if (x != b3)
+ return CMP_LT_OR_GT (x, b3);
+
+ do3:
+ a1 = ((op_t *) srcp1)[1];
+ b1 = ((op_t *) srcp2)[1];
+ x = MERGE(a3, shl, a0, shr);
+ if (x != b0)
+ return CMP_LT_OR_GT (x, b0);
+
+ do2:
+ a2 = ((op_t *) srcp1)[2];
+ b2 = ((op_t *) srcp2)[2];
+ x = MERGE(a0, shl, a1, shr);
+ if (x != b1)
+ return CMP_LT_OR_GT (x, b1);
+
+ do1:
+ a3 = ((op_t *) srcp1)[3];
+ b3 = ((op_t *) srcp2)[3];
+ x = MERGE(a1, shl, a2, shr);
+ if (x != b2)
+ return CMP_LT_OR_GT (x, b2);
+
+ srcp1 += 4 * OPSIZ;
+ srcp2 += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ x = MERGE(a2, shl, a3, shr);
+ if (x != b3)
+ return CMP_LT_OR_GT (x, b3);
+ return 0;
+}
+
+int
+memcmp (s1, s2, len)
+ const __ptr_t s1;
+ const __ptr_t s2;
+ size_t len;
+{
+ op_t a0;
+ op_t b0;
+ long int srcp1 = (long int) s1;
+ long int srcp2 = (long int) s2;
+ op_t res;
+
+ if (len >= OP_T_THRES)
+ {
+ /* There are at least some bytes to compare. No need to test
+ for LEN == 0 in this alignment loop. */
+ while (srcp2 % OPSIZ != 0)
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ res = a0 - b0;
+ if (res != 0)
+ return res;
+ len -= 1;
+ }
+
+ /* SRCP2 is now aligned for memory operations on `op_t'.
+ SRCP1 alignment determines if we can do a simple,
+ aligned compare or need to shuffle bits. */
+
+ if (srcp1 % OPSIZ == 0)
+ res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ);
+ else
+ res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ);
+ if (res != 0)
+ return res;
+
+ /* Number of bytes remaining in the interval [0..OPSIZ-1]. */
+ srcp1 += len & -OPSIZ;
+ srcp2 += len & -OPSIZ;
+ len %= OPSIZ;
+ }
+
+ /* There are just a few bytes to compare. Use byte memory operations. */
+ while (len != 0)
+ {
+ a0 = ((byte *) srcp1)[0];
+ b0 = ((byte *) srcp2)[0];
+ srcp1 += 1;
+ srcp2 += 1;
+ res = a0 - b0;
+ if (res != 0)
+ return res;
+ len -= 1;
+ }
+
+ return 0;
+}
+
+#ifdef weak_alias
+# undef bcmp
+weak_alias (memcmp, bcmp)
+#endif
diff --git a/lib/memmove.c b/lib/memmove.c
new file mode 100644
index 0000000..46de02c
--- /dev/null
+++ b/lib/memmove.c
@@ -0,0 +1,76 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA. */
+
+/* Last ditch effort to support memmove: if user doesn't have
+ memmove or bcopy, we offer this sluggish implementation. */
+
+#include "config.h"
+#ifndef HAVE_MEMMOVE
+
+#include <sys/types.h>
+#ifdef HAVE_MEMORY_H
+# include <memory.h>
+#endif
+
+#ifndef VOID
+# define VOID void
+#endif
+
+VOID *
+memmove(dest, src, len)
+ VOID *dest;
+ const VOID *src;
+ size_t len;
+{
+#ifdef HAVE_BCOPY
+ bcopy(src, dest, len);
+
+#else /*!HAVE_BCOPY*/
+ char *dp = dest;
+ const char *sp = src;
+
+# ifdef HAVE_MEMCPY
+ /* A special-case for non-overlapping regions, on the assumption
+ that there is some hope that the sytem's memcpy() implementaion
+ is better than our dumb fall-back one. */
+ if ((dp < sp && dp+len < sp) || (sp < dp && sp+len < dp))
+ return memcpy(dest, src, len);
+# endif
+
+ /* I tried real hard to avoid getting to this point.
+ You *really* ought to upgrade your system's libraries;
+ the performance of this implementation sucks. */
+ if (dp < sp)
+ {
+ while (len-- > 0)
+ *dp++ = *sp++;
+ }
+ else
+ {
+ if (dp == sp)
+ return dest;
+ dp += len;
+ sp += len;
+ while (len-- > 0)
+ *--dp = *--sp;
+ }
+#endif /*!HAVE_BCOPY*/
+
+ return dest;
+}
+
+#endif /*!HAVE_MEMMOVE*/
diff --git a/lib/mkstemp.c b/lib/mkstemp.c
new file mode 100644
index 0000000..5b00205
--- /dev/null
+++ b/lib/mkstemp.c
@@ -0,0 +1,70 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#else
+# include <string.h>
+#endif /* HAVE_STRINGS_H */
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif /* HAVE_STDLIB_H */
+
+#ifdef HAVE_SYS_FILE_H
+# include <sys/file.h>
+#endif /* HAVE_SYS_FILE_H */
+
+#ifdef HAVE_IO_H
+# include <io.h>
+#endif /* HAVE_IO_H */
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif /* HAVE_FCNTL_H */
+
+#include <limits.h>
+#include <errno.h>
+
+/* Generate a unique temporary file name from template. The last six characters of
+ template must be XXXXXX and these are replaced with a string that makes the
+ filename unique. */
+
+int
+mkstemp (template)
+ char *template;
+{
+ int i, j, n, fd;
+ char *data = template + strlen(template) - 6;
+
+ if (data < template) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (n = 0; n <= 5; n++)
+ if (data[n] != 'X') {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (i = 0; i < INT_MAX; i++) {
+ j = i ^ 827714841; /* Base 36 DOSSUX :-) */
+ for (n = 5; n >= 0; n--) {
+ data[n] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" [j % 36];
+ j /= 36;
+ }
+
+ fd = open (template, O_CREAT|O_EXCL|O_RDWR, 0600);
+ if (fd != -1)
+ return fd;
+ }
+
+ errno = EEXIST;
+ return -1;
+}
diff --git a/lib/obstack.c b/lib/obstack.c
new file mode 100644
index 0000000..f67625d
--- /dev/null
+++ b/lib/obstack.c
@@ -0,0 +1,569 @@
+/* obstack.c - subroutines used implicitly by object stack macros -*- C -*-
+ Copyright (C) 1988,89,90,91,92,93,94,96,97 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "obstack.h"
+
+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
+ incremented whenever callers compiled using an old obstack.h can no
+ longer properly call the functions in this obstack.c. */
+#define OBSTACK_INTERFACE_VERSION 1
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself, and the installed library
+ supports the same library interface we do. This code is part of the GNU
+ C Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object
+ files, it is simpler to just do this in the source for each such file. */
+
+#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */
+#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1
+#include <gnu-versions.h>
+#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+
+#ifndef ELIDE_CODE
+
+
+#if defined (__STDC__) && __STDC__
+#define POINTER void *
+#else
+#define POINTER char *
+#endif
+
+/* Determine default alignment. */
+struct fooalign {char x; double d;};
+#define DEFAULT_ALIGNMENT \
+ ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0))
+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
+ But in fact it might be less smart and round addresses to as much as
+ DEFAULT_ROUNDING. So we prepare for it to do that. */
+union fooround {long x; double d;};
+#define DEFAULT_ROUNDING (sizeof (union fooround))
+
+#ifdef original_glibc_code
+/**//* When we copy a long block of data, this is the unit to do it with. */
+/**//* On some machines, copying successive ints does not work; */
+/**//* in such a case, redefine COPYING_UNIT to `long' (if that works) */
+/**//* or `char' as a last resort. */
+/**/#ifndef COPYING_UNIT
+/**/#define COPYING_UNIT int
+/**/#endif
+#endif
+
+/* The functions allocating more room by calling `obstack_chunk_alloc'
+ jump to the handler pointed to by `obstack_alloc_failed_handler'.
+ This variable by default points to the internal function
+ `print_and_abort'. */
+#if defined (__STDC__) && __STDC__
+static void print_and_abort (void);
+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
+#else
+static void print_and_abort ();
+void (*obstack_alloc_failed_handler) () = print_and_abort;
+#endif
+
+/* Exit value used when `print_and_abort' is used. */
+#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+int obstack_exit_failure = EXIT_FAILURE;
+
+/* The non-GNU-C macros copy the obstack into this global variable
+ to avoid multiple evaluation. */
+
+struct obstack *_obstack;
+
+/* Define a macro that either calls functions with the traditional malloc/free
+ calling interface, or calls functions with the mmalloc/mfree interface
+ (that adds an extra first argument), based on the state of use_extra_arg.
+ For free, do not use ?:, since some compilers, like the MIPS compilers,
+ do not allow (expr) ? void : void. */
+
+#if defined (__STDC__) && __STDC__
+#define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
+
+#define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
+ } while (0)
+#else
+#define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size)))
+
+#define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(void (*) ()) (h)->freefun) ((old_chunk)); \
+ } while (0)
+#endif
+
+
+/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
+ Objects start on multiples of ALIGNMENT (0 means use default).
+ CHUNKFUN is the function to use to allocate chunks,
+ and FREEFUN the function to free them.
+
+ Return nonzero if successful, zero if out of memory.
+ To recover from an out of memory error,
+ free up some memory, then call this again. */
+
+int
+_obstack_begin (h, size, alignment, chunkfun, freefun)
+ struct obstack *h;
+ int size;
+ int alignment;
+#if defined (__STDC__) && __STDC__
+ POINTER (*chunkfun) (long);
+ void (*freefun) (void *);
+#else
+ POINTER (*chunkfun) ();
+ void (*freefun) ();
+#endif
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+#if defined (__STDC__) && __STDC__
+ h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+#else
+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
+ h->freefun = freefun;
+#endif
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->use_extra_arg = 0;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = chunk->contents;
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+int
+_obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg)
+ struct obstack *h;
+ int size;
+ int alignment;
+#if defined (__STDC__) && __STDC__
+ POINTER (*chunkfun) (POINTER, long);
+ void (*freefun) (POINTER, POINTER);
+#else
+ POINTER (*chunkfun) ();
+ void (*freefun) ();
+#endif
+ POINTER arg;
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+#if defined(__STDC__) && __STDC__
+ h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+#else
+ h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun;
+ h->freefun = freefun;
+#endif
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->extra_arg = arg;
+ h->use_extra_arg = 1;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = chunk->contents;
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+/* Allocate a new current chunk for the obstack *H
+ on the assumption that LENGTH bytes need to be added
+ to the current object, or a new object of length LENGTH allocated.
+ Copies any partial object from the end of the old chunk
+ to the beginning of the new one. */
+
+void
+_obstack_newchunk (h, length)
+ struct obstack *h;
+ int length;
+{
+ register struct _obstack_chunk *old_chunk = h->chunk;
+ register struct _obstack_chunk *new_chunk;
+ register long new_size;
+ register int obj_size = h->next_free - h->object_base;
+
+ /* Compute size for new chunk. */
+ new_size = (obj_size + length) + (obj_size >> 3) + 100;
+ if (new_size < h->chunk_size)
+ new_size = h->chunk_size;
+
+ /* Allocate and initialize the new chunk. */
+ new_chunk = CALL_CHUNKFUN (h, new_size);
+ if (!new_chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->chunk = new_chunk;
+ new_chunk->prev = old_chunk;
+ new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
+
+ _obstack_memcpy(new_chunk->contents, h->object_base, obj_size);
+
+ /* If the object just copied was the only data in OLD_CHUNK, */
+ /* free that chunk and remove it from the chain. */
+ /* But not if that chunk might contain an empty object. */
+ if (h->object_base == old_chunk->contents && ! h->maybe_empty_object)
+ {
+ new_chunk->prev = old_chunk->prev;
+ CALL_FREEFUN (h, old_chunk);
+ }
+
+ h->object_base = new_chunk->contents;
+ h->next_free = h->object_base + obj_size;
+ /* The new chunk certainly contains no empty object yet. */
+ h->maybe_empty_object = 0;
+}
+
+/* Return nonzero if object OBJ has been allocated from obstack H.
+ This is here for debugging.
+ If you use it in a program, you are probably losing. */
+
+#if defined (__STDC__) && __STDC__
+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
+ obstack.h because it is just for debugging. */
+int _obstack_allocated_p (struct obstack *h, POINTER obj);
+#endif
+
+int
+_obstack_allocated_p (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = (h)->chunk;
+ /* We use >= rather than > since the object cannot be exactly at
+ the beginning of the chunk but might be an empty object exactly
+ at the end of an adjacent chunk. */
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ lp = plp;
+ }
+ return lp != 0;
+}
+
+/* Free objects in obstack H, including OBJ and everything allocate
+ more recently than OBJ. If OBJ is zero, free everything in H. */
+
+#undef obstack_free
+
+/* This function has two names with identical definitions.
+ This is the first one, called from non-ANSI code. */
+
+void
+_obstack_free (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *) (obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+/* This function is used from ANSI code. */
+
+void
+obstack_free (h, obj)
+ struct obstack *h;
+ POINTER obj;
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *) (obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+int
+_obstack_memory_used (h)
+ struct obstack *h;
+{
+ register struct _obstack_chunk* lp;
+ register int nbytes = 0;
+
+ for (lp = h->chunk; lp != 0; lp = lp->prev)
+ {
+ nbytes += lp->limit - (char *) lp;
+ }
+ return nbytes;
+}
+
+/* Define the error handler. */
+#ifndef _
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# ifndef _
+# define _(Str) gettext (Str)
+# endif
+# else
+# define _(Str) (Str)
+# endif
+#endif
+
+static void
+print_and_abort ()
+{
+ fputs (_("memory exhausted\n"), stderr);
+ exit (obstack_exit_failure);
+}
+
+#if 0
+/* These are now turned off because the applications do not use it
+ and it uses bcopy via obstack_grow, which causes trouble on sysV. */
+
+/* Now define the functional versions of the obstack macros.
+ Define them to simply use the corresponding macros to do the job. */
+
+#if defined (__STDC__) && __STDC__
+/* These function definitions do not work with non-ANSI preprocessors;
+ they won't pass through the macro names in parentheses. */
+
+/* The function names appear in parentheses in order to prevent
+ the macro-definitions of the names from being expanded there. */
+
+POINTER (obstack_base) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_base (obstack);
+}
+
+POINTER (obstack_next_free) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_next_free (obstack);
+}
+
+int (obstack_object_size) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_object_size (obstack);
+}
+
+int (obstack_room) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_room (obstack);
+}
+
+int (obstack_make_room) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ return obstack_make_room (obstack, length);
+}
+
+void (obstack_grow) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ obstack_grow (obstack, pointer, length);
+}
+
+void (obstack_grow0) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ obstack_grow0 (obstack, pointer, length);
+}
+
+void (obstack_1grow) (obstack, character)
+ struct obstack *obstack;
+ int character;
+{
+ obstack_1grow (obstack, character);
+}
+
+void (obstack_blank) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ obstack_blank (obstack, length);
+}
+
+void (obstack_1grow_fast) (obstack, character)
+ struct obstack *obstack;
+ int character;
+{
+ obstack_1grow_fast (obstack, character);
+}
+
+void (obstack_blank_fast) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ obstack_blank_fast (obstack, length);
+}
+
+POINTER (obstack_finish) (obstack)
+ struct obstack *obstack;
+{
+ return obstack_finish (obstack);
+}
+
+POINTER (obstack_alloc) (obstack, length)
+ struct obstack *obstack;
+ int length;
+{
+ return obstack_alloc (obstack, length);
+}
+
+POINTER (obstack_copy) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ return obstack_copy (obstack, pointer, length);
+}
+
+POINTER (obstack_copy0) (obstack, pointer, length)
+ struct obstack *obstack;
+ POINTER pointer;
+ int length;
+{
+ return obstack_copy0 (obstack, pointer, length);
+}
+
+#endif /* __STDC__ */
+
+#endif /* 0 */
+
+#endif /* !ELIDE_CODE */
diff --git a/lib/obstack.h b/lib/obstack.h
new file mode 100644
index 0000000..988ff00
--- /dev/null
+++ b/lib/obstack.h
@@ -0,0 +1,605 @@
+/* obstack.h - object stack macros
+ Copyright (C) 1988,89,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library. Its master source is NOT part of
+ the C library, however. The master source lives in /gd/gnu/lib.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Summary:
+
+All the apparent functions defined here are macros. The idea
+is that you would use these pre-tested macros to solve a
+very specific set of problems, and they would run fast.
+Caution: no side-effects in arguments please!! They may be
+evaluated MANY times!!
+
+These macros operate a stack of objects. Each object starts life
+small, and may grow to maturity. (Consider building a word syllable
+by syllable.) An object can move while it is growing. Once it has
+been "finished" it never changes address again. So the "top of the
+stack" is typically an immature growing object, while the rest of the
+stack is of mature, fixed size and fixed address objects.
+
+These routines grab large chunks of memory, using a function you
+supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
+by calling `obstack_chunk_free'. You must define them and declare
+them before using any obstack macros.
+
+Each independent stack is represented by a `struct obstack'.
+Each of the obstack macros expects a pointer to such a structure
+as the first argument.
+
+One motivation for this package is the problem of growing char strings
+in symbol tables. Unless you are "fascist pig with a read-only mind"
+--Gosper's immortal quote from HAKMEM item 154, out of context--you
+would not like to put any arbitrary upper limit on the length of your
+symbols.
+
+In practice this often means you will build many short symbols and a
+few long symbols. At the time you are reading a symbol you don't know
+how long it is. One traditional method is to read a symbol into a
+buffer, realloc()ating the buffer every time you try to read a symbol
+that is longer than the buffer. This is beaut, but you still will
+want to copy the symbol from the buffer to a more permanent
+symbol-table entry say about half the time.
+
+With obstacks, you can work differently. Use one obstack for all symbol
+names. As you read a symbol, grow the name in the obstack gradually.
+When the name is complete, finalize it. Then, if the symbol exists already,
+free the newly read name.
+
+The way we do this is to take a large chunk, allocating memory from
+low addresses. When you want to build a symbol in the chunk you just
+add chars above the current "high water mark" in the chunk. When you
+have finished adding chars, because you got to the end of the symbol,
+you know how long the chars are, and you can create a new object.
+Mostly the chars will not burst over the highest address of the chunk,
+because you would typically expect a chunk to be (say) 100 times as
+long as an average object.
+
+In case that isn't clear, when we have enough chars to make up
+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
+so we just point to it where it lies. No moving of chars is
+needed and this is the second win: potentially long strings need
+never be explicitly shuffled. Once an object is formed, it does not
+change its address during its lifetime.
+
+When the chars burst over a chunk boundary, we allocate a larger
+chunk, and then copy the partly formed object from the end of the old
+chunk to the beginning of the new larger chunk. We then carry on
+accreting characters to the end of the object as we normally would.
+
+A special macro is provided to add a single char at a time to a
+growing object. This allows the use of register variables, which
+break the ordinary 'growth' macro.
+
+Summary:
+ We allocate large chunks.
+ We carve out one object at a time from the current chunk.
+ Once carved, an object never moves.
+ We are free to append data of any size to the currently
+ growing object.
+ Exactly one object is growing in an obstack at any one time.
+ You can run one obstack per control block.
+ You may have as many control blocks as you dare.
+ Because of the way we do it, you can `unwind' an obstack
+ back to a previous state. (You may remove objects much
+ as you would with a stack.)
+*/
+
+
+/* Don't do the contents of this file more than once. */
+
+#ifndef _OBSTACK_H
+#define _OBSTACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* We use subtraction of (char *) 0 instead of casting to int
+ because on word-addressable machines a simple cast to int
+ may ignore the byte-within-word field of the pointer. */
+
+#ifndef __PTR_TO_INT
+# define __PTR_TO_INT(P) ((P) - (char *) 0)
+#endif
+
+#ifndef __INT_TO_PTR
+# define __INT_TO_PTR(P) ((P) + (char *) 0)
+#endif
+
+/* We need the type of the resulting object. If __PTRDIFF_TYPE__ is
+ defined, as with GNU C, use that; that way we don't pollute the
+ namespace with <stddef.h>'s symbols. Otherwise, if <stddef.h> is
+ available, include it and use ptrdiff_t. In traditional C, long is
+ the best that we can do. */
+
+#ifdef __PTRDIFF_TYPE__
+# define PTR_INT_TYPE __PTRDIFF_TYPE__
+#else
+# ifdef HAVE_STDDEF_H
+# include <stddef.h>
+# define PTR_INT_TYPE ptrdiff_t
+# else
+# define PTR_INT_TYPE long
+# endif
+#endif
+
+#if defined _LIBC || defined HAVE_STRING_H
+# include <string.h>
+# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
+#else
+# ifdef memcpy
+# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N))
+# else
+# define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N))
+# endif
+#endif
+
+struct _obstack_chunk /* Lives at front of each chunk. */
+{
+ char *limit; /* 1 past end of this chunk */
+ struct _obstack_chunk *prev; /* address of prior chunk or NULL */
+ char contents[4]; /* objects begin here */
+};
+
+struct obstack /* control current object in current chunk */
+{
+ long chunk_size; /* preferred size to allocate chunks in */
+ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */
+ char *object_base; /* address of object we are building */
+ char *next_free; /* where to add next char to current object */
+ char *chunk_limit; /* address of char after current chunk */
+ PTR_INT_TYPE temp; /* Temporary for some macros. */
+ int alignment_mask; /* Mask of alignment for each object. */
+#if defined __STDC__ && __STDC__
+ /* These prototypes vary based on `use_extra_arg', and we use
+ casts to the prototypeless function type in all assignments,
+ but having prototypes here quiets -Wstrict-prototypes. */
+ struct _obstack_chunk *(*chunkfun) (void *, long);
+ void (*freefun) (void *, struct _obstack_chunk *);
+ void *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+#else
+ struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */
+ void (*freefun) (); /* User's function to free a chunk. */
+ char *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+#endif
+ unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
+ unsigned maybe_empty_object:1;/* There is a possibility that the current
+ chunk contains a zero-length object. This
+ prevents freeing the chunk if we allocate
+ a bigger chunk to replace it. */
+ unsigned alloc_failed:1; /* No longer used, as we now call the failed
+ handler on error, but retained for binary
+ compatibility. */
+};
+
+/* Declare the external functions we use; they are in obstack.c. */
+
+#if defined __STDC__ && __STDC__
+extern void _obstack_newchunk (struct obstack *, int);
+extern void _obstack_free (struct obstack *, void *);
+extern int _obstack_begin (struct obstack *, int, int,
+ void *(*) (long), void (*) (void *));
+extern int _obstack_begin_1 (struct obstack *, int, int,
+ void *(*) (void *, long),
+ void (*) (void *, void *), void *);
+extern int _obstack_memory_used (struct obstack *);
+#else
+extern void _obstack_newchunk ();
+extern void _obstack_free ();
+extern int _obstack_begin ();
+extern int _obstack_begin_1 ();
+extern int _obstack_memory_used ();
+#endif
+
+#if defined __STDC__ && __STDC__
+
+/* Do the function-declarations after the structs
+ but before defining the macros. */
+
+void obstack_init (struct obstack *obstack);
+
+void * obstack_alloc (struct obstack *obstack, int size);
+
+void * obstack_copy (struct obstack *obstack, const void *address, int size);
+void * obstack_copy0 (struct obstack *obstack, const void *address, int size);
+
+void obstack_free (struct obstack *obstack, void *block);
+
+void obstack_blank (struct obstack *obstack, int size);
+
+void obstack_grow (struct obstack *obstack, const void *data, int size);
+void obstack_grow0 (struct obstack *obstack, const void *data, int size);
+
+void obstack_1grow (struct obstack *obstack, int data_char);
+void obstack_ptr_grow (struct obstack *obstack, const void *data);
+void obstack_int_grow (struct obstack *obstack, int data);
+
+void * obstack_finish (struct obstack *obstack);
+
+int obstack_object_size (struct obstack *obstack);
+
+int obstack_room (struct obstack *obstack);
+void obstack_make_room (struct obstack *obstack, int size);
+void obstack_1grow_fast (struct obstack *obstack, int data_char);
+void obstack_ptr_grow_fast (struct obstack *obstack, const void *data);
+void obstack_int_grow_fast (struct obstack *obstack, int data);
+void obstack_blank_fast (struct obstack *obstack, int size);
+
+void * obstack_base (struct obstack *obstack);
+void * obstack_next_free (struct obstack *obstack);
+int obstack_alignment_mask (struct obstack *obstack);
+int obstack_chunk_size (struct obstack *obstack);
+int obstack_memory_used (struct obstack *obstack);
+
+#endif /* __STDC__ */
+
+/* Non-ANSI C cannot really support alternative functions for these macros,
+ so we do not declare them. */
+
+/* Error handler called when `obstack_chunk_alloc' failed to allocate
+ more memory. This can be set to a user defined function which
+ should either abort gracefully or use longjump - but shouldn't
+ return. The default action is to print a message and abort. */
+#if defined __STDC__ && __STDC__
+extern void (*obstack_alloc_failed_handler) (void);
+#else
+extern void (*obstack_alloc_failed_handler) ();
+#endif
+
+/* Exit value used when `print_and_abort' is used. */
+extern int obstack_exit_failure;
+
+/* Pointer to beginning of object being allocated or to be allocated next.
+ Note that this might not be the final address of the object
+ because a new chunk might be needed to hold the final size. */
+
+#define obstack_base(h) ((h)->object_base)
+
+/* Size for allocating ordinary chunks. */
+
+#define obstack_chunk_size(h) ((h)->chunk_size)
+
+/* Pointer to next byte not yet allocated in current chunk. */
+
+#define obstack_next_free(h) ((h)->next_free)
+
+/* Mask specifying low bits that should be clear in address of an object. */
+
+#define obstack_alignment_mask(h) ((h)->alignment_mask)
+
+/* To prevent prototype warnings provide complete argument list in
+ standard C version. */
+#if defined __STDC__ && __STDC__
+
+# define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+# define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) (long)) (chunkfun), \
+ (void (*) (void *)) (freefun))
+
+# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) (void *, long)) (chunkfun), \
+ (void (*) (void *, void *)) (freefun), (arg))
+
+# define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
+
+# define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
+
+#else
+
+# define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) ()) obstack_chunk_alloc, \
+ (void (*) ()) obstack_chunk_free)
+
+# define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) ()) obstack_chunk_alloc, \
+ (void (*) ()) obstack_chunk_free)
+
+# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) ()) (chunkfun), \
+ (void (*) ()) (freefun))
+
+# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) ()) (chunkfun), \
+ (void (*) ()) (freefun), (arg))
+
+# define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun))
+
+# define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)()) (newfreefun))
+
+#endif
+
+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar)
+
+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#define obstack_memory_used(h) _obstack_memory_used (h)
+
+#if defined __GNUC__ && defined __STDC__ && __STDC__
+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
+ does not implement __extension__. But that compiler doesn't define
+ __GNUC_MINOR__. */
+# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
+# define __extension__
+# endif
+
+/* For GNU C, if not -traditional,
+ we can define these macros to compute all args only once
+ without using a global variable.
+ Also, we can avoid using the `temp' slot, to make faster code. */
+
+# define obstack_object_size(OBSTACK) \
+ __extension__ \
+ ({ struct obstack *__o = (OBSTACK); \
+ (unsigned) (__o->next_free - __o->object_base); })
+
+# define obstack_room(OBSTACK) \
+ __extension__ \
+ ({ struct obstack *__o = (OBSTACK); \
+ (unsigned) (__o->chunk_limit - __o->next_free); })
+
+# define obstack_make_room(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ (void) 0; })
+
+# define obstack_empty_p(OBSTACK) \
+ __extension__ \
+ ({ struct obstack *__o = (OBSTACK); \
+ (__o->chunk->prev == 0 && __o->next_free - __o->chunk->contents == 0); })
+
+# define obstack_grow(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len); \
+ _obstack_memcpy (__o->next_free, (where), __len); \
+ __o->next_free += __len; \
+ (void) 0; })
+
+# define obstack_grow0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len + 1); \
+ _obstack_memcpy (__o->next_free, (where), __len); \
+ __o->next_free += __len; \
+ *(__o->next_free)++ = 0; \
+ (void) 0; })
+
+# define obstack_1grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, 1); \
+ *(__o->next_free)++ = (datum); \
+ (void) 0; })
+
+/* These assume that the obstack alignment is good enough for pointers
+ or ints, and that the data added so far to the current object
+ shares that much alignment. */
+
+# define obstack_ptr_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (void *)); \
+ ((*((void **)__o->next_free) = (datum)), (__o->next_free += sizeof (void *))); \
+ (void) 0; })
+
+# define obstack_int_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (int)); \
+ ((*((int *)__o->next_free) = (datum)), (__o->next_free += sizeof (int ))); \
+ (void) 0; })
+
+# define obstack_ptr_grow_fast(h,aptr) \
+ (((*((void **) (h)->next_free) = (aptr)), ( (h)->next_free += sizeof (void *))))
+
+# define obstack_int_grow_fast(h,aint) \
+ (((*((int *) (h)->next_free) = (aint)), ( (h)->next_free += sizeof (int ))))
+
+# define obstack_blank(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ __o->next_free += __len; \
+ (void) 0; })
+
+# define obstack_alloc(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_blank (__h, (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow0 (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+/* The local variable is named __o1 to avoid a name conflict
+ when obstack_blank is called. */
+# define obstack_finish(OBSTACK) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ void *value; \
+ value = (void *) __o1->object_base; \
+ if (__o1->next_free == value) \
+ __o1->maybe_empty_object = 1; \
+ __o1->next_free \
+ = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\
+ & ~ (__o1->alignment_mask)); \
+ if (__o1->next_free - (char *)__o1->chunk \
+ > __o1->chunk_limit - (char *)__o1->chunk) \
+ __o1->next_free = __o1->chunk_limit; \
+ __o1->object_base = __o1->next_free; \
+ value; })
+
+# define obstack_free(OBSTACK, OBJ) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ void *__obj = (OBJ); \
+ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
+ __o->next_free = __o->object_base = (char *)__obj; \
+ else (obstack_free) (__o, __obj); })
+
+#else /* not __GNUC__ or not __STDC__ */
+
+# define obstack_object_size(h) \
+ (unsigned) ((h)->next_free - (h)->object_base)
+
+# define obstack_room(h) \
+ (unsigned) ((h)->chunk_limit - (h)->next_free)
+
+# define obstack_empty_p(h) \
+ ((h)->chunk->prev == 0 && (h)->next_free - (h)->chunk->contents == 0)
+
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+ so that we can avoid having void expressions
+ in the arms of the conditional expression.
+ Casting the third operand to void was tried before,
+ but some compilers won't accept it. */
+
+# define obstack_make_room(h,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0))
+
+# define obstack_grow(h,where,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
+ _obstack_memcpy ((h)->next_free, (where), (h)->temp), \
+ (h)->next_free += (h)->temp)
+
+# define obstack_grow0(h,where,length) \
+( (h)->temp = (length), \
+ (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \
+ _obstack_memcpy ((h)->next_free, (where), (h)->temp), \
+ (h)->next_free += (h)->temp, \
+ *((h)->next_free)++ = 0)
+
+# define obstack_1grow(h,datum) \
+( (((h)->next_free + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), 1), 0) : 0), \
+ (*((h)->next_free)++ = (datum)))
+
+# define obstack_ptr_grow(h,datum) \
+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
+ (*((const char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = (datum)))
+
+# define obstack_int_grow(h,datum) \
+( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
+ (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = (datum)))
+
+# define obstack_ptr_grow_fast(h,aptr) \
+ (((*((const char **) (h)->next_free) = (aptr)), ( (h)->next_free += sizeof (const char *))))
+
+# define obstack_int_grow_fast(h,aint) \
+ (((*((int *) (h)->next_free) = (aint)), ( (h)->next_free += sizeof (int ))))
+
+# define obstack_blank(h,length) \
+( (h)->temp = (length), \
+ (((h)->chunk_limit - (h)->next_free < (h)->temp) \
+ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \
+ ((h)->next_free += (h)->temp))
+
+# define obstack_alloc(h,length) \
+ (obstack_blank ((h), (length)), obstack_finish ((h)))
+
+# define obstack_copy(h,where,length) \
+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_copy0(h,where,length) \
+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_finish(h) \
+( ((h)->next_free == (h)->object_base \
+ ? (((h)->maybe_empty_object = 1), 0) \
+ : 0), \
+ (h)->temp = __PTR_TO_INT ((h)->object_base), \
+ (h)->next_free \
+ = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \
+ & ~ ((h)->alignment_mask)), \
+ (((h)->next_free - (char *) (h)->chunk \
+ > (h)->chunk_limit - (char *) (h)->chunk) \
+ ? ((h)->next_free = (h)->chunk_limit) : 0), \
+ (h)->object_base = (h)->next_free, \
+ __INT_TO_PTR ((h)->temp))
+
+# if defined __STDC__ && __STDC__
+# define obstack_free(h,obj) \
+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp + (char *) (h)->chunk) \
+ : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0)))
+# else
+# define obstack_free(h,obj) \
+( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \
+ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp + (char *) (h)->chunk) \
+ : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0)))
+# endif
+
+#endif /* not __GNUC__ or not __STDC__ */
+
+#ifdef __cplusplus
+} /* C++ */
+#endif
+
+#endif /* obstack.h */
diff --git a/lib/regcomp.c b/lib/regcomp.c
new file mode 100644
index 0000000..96b63a4
--- /dev/null
+++ b/lib/regcomp.c
@@ -0,0 +1,3793 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
+ int length, reg_syntax_t syntax);
+static void re_compile_fastmap_iter (regex_t *bufp,
+ const re_dfastate_t *init_state,
+ char *fastmap);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
+static void init_word_char (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void free_charset (re_charset_t *cset);
+#endif /* RE_ENABLE_I18N */
+static void free_workarea_compile (regex_t *preg);
+static reg_errcode_t create_initial_state (re_dfa_t *dfa);
+#ifdef RE_ENABLE_I18N
+static void optimize_utf8 (re_dfa_t *dfa);
+#endif
+static reg_errcode_t analyze (re_dfa_t *dfa);
+static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
+static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
+ int top_clone_node, int root_node,
+ unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+ unsigned int constraint);
+static int search_duplicated_node (re_dfa_t *dfa, int org_node,
+ unsigned int constraint);
+static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
+static void calc_inveclosure (re_dfa_t *dfa);
+static int fetch_number (re_string_t *input, re_token_t *token,
+ reg_syntax_t syntax);
+static void fetch_token (re_token_t *result, re_string_t *input,
+ reg_syntax_t syntax);
+static int peek_token (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax);
+static int peek_token_bracket (re_token_t *token, re_string_t *input,
+ reg_syntax_t syntax);
+static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
+ re_token_t *token, reg_syntax_t syntax,
+ int nest, reg_errcode_t *err);
+static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
+ re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err);
+static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
+ re_token_t *token, reg_syntax_t syntax,
+ reg_errcode_t *err);
+static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token, int token_len,
+ re_dfa_t *dfa,
+ reg_syntax_t syntax,
+ int accept_hyphen);
+static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
+ re_string_t *regexp,
+ re_token_t *token);
+#ifndef _LIBC
+# ifdef RE_ENABLE_I18N
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *coll_sym_alloc,
+ const unsigned char *name);
+# else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset,
+ bracket_elem_t *start_elem,
+ bracket_elem_t *end_elem);
+static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
+ const unsigned char *name);
+# endif /* not RE_ENABLE_I18N */
+#endif /* not _LIBC */
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *equiv_class_alloc,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
+ re_bitset_ptr_t sbcset,
+ re_charset_t *mbcset,
+ int *char_class_alloc,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#else /* not RE_ENABLE_I18N */
+static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+ const unsigned char *name);
+static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans,
+ re_bitset_ptr_t sbcset,
+ const unsigned char *class_name,
+ reg_syntax_t syntax);
+#endif /* not RE_ENABLE_I18N */
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
+ unsigned RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra,
+ int non_match, reg_errcode_t *err);
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ re_token_type_t type, int index);
+static bin_tree_t *re_dfa_add_tree_node (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
+ __attribute ((noinline));
+static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
+static void mark_opt_subexp (const bin_tree_t *src, re_dfa_t *dfa);
+static void mark_opt_subexp_iter (const bin_tree_t *src, re_dfa_t *dfa, int idx);
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+const char __re_error_msgid[] attribute_hidden =
+ {
+#define REG_NOERROR_IDX 0
+ gettext_noop ("Success") /* REG_NOERROR */
+ "\0"
+#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
+ gettext_noop ("No match") /* REG_NOMATCH */
+ "\0"
+#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
+ gettext_noop ("Invalid regular expression") /* REG_BADPAT */
+ "\0"
+#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
+ gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
+ "\0"
+#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
+ gettext_noop ("Invalid character class name") /* REG_ECTYPE */
+ "\0"
+#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
+ gettext_noop ("Trailing backslash") /* REG_EESCAPE */
+ "\0"
+#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
+ gettext_noop ("Invalid back reference") /* REG_ESUBREG */
+ "\0"
+#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
+ gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+ "\0"
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+ gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
+ "\0"
+#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
+ gettext_noop ("Unmatched \\{") /* REG_EBRACE */
+ "\0"
+#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
+ gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
+ "\0"
+#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
+ gettext_noop ("Invalid range end") /* REG_ERANGE */
+ "\0"
+#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
+ gettext_noop ("Memory exhausted") /* REG_ESPACE */
+ "\0"
+#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
+ gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
+ "\0"
+#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
+ gettext_noop ("Premature end of regular expression") /* REG_EEND */
+ "\0"
+#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
+ gettext_noop ("Regular expression too big") /* REG_ESIZE */
+ "\0"
+#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
+ gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
+ };
+
+const size_t __re_error_msgid_idx[] attribute_hidden =
+ {
+ REG_NOERROR_IDX,
+ REG_NOMATCH_IDX,
+ REG_BADPAT_IDX,
+ REG_ECOLLATE_IDX,
+ REG_ECTYPE_IDX,
+ REG_EESCAPE_IDX,
+ REG_ESUBREG_IDX,
+ REG_EBRACK_IDX,
+ REG_EPAREN_IDX,
+ REG_EBRACE_IDX,
+ REG_BADBR_IDX,
+ REG_ERANGE_IDX,
+ REG_ESPACE_IDX,
+ REG_BADRPT_IDX,
+ REG_EEND_IDX,
+ REG_ESIZE_IDX,
+ REG_ERPAREN_IDX
+ };
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length LENGTH) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ size_t length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
+
+ if (!ret)
+ return NULL;
+ return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ char *fastmap = bufp->fastmap;
+
+ memset (fastmap, '\0', sizeof (char) * SBC_MAX);
+ re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
+ if (dfa->init_state != dfa->init_state_word)
+ re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
+ if (dfa->init_state != dfa->init_state_nl)
+ re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
+ if (dfa->init_state != dfa->init_state_begbuf)
+ re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
+ bufp->fastmap_accurate = 1;
+ return 0;
+}
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
+
+static inline void
+__attribute ((always_inline))
+re_set_fastmap (char *fastmap, int icase, int ch)
+{
+ fastmap[ch] = 1;
+ if (icase)
+ fastmap[tolower (ch)] = 1;
+}
+
+/* Helper function for re_compile_fastmap.
+ Compile fastmap for the initial_state INIT_STATE. */
+
+static void
+re_compile_fastmap_iter (bufp, init_state, fastmap)
+ regex_t *bufp;
+ const re_dfastate_t *init_state;
+ char *fastmap;
+{
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+ int node_cnt;
+ int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
+ for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
+ {
+ int node = init_state->nodes.elems[node_cnt];
+ re_token_type_t type = dfa->nodes[node].type;
+
+ if (type == CHARACTER)
+ {
+ re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
+#ifdef RE_ENABLE_I18N
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ unsigned char *buf = alloca (dfa->mb_cur_max), *p;
+ wchar_t wc;
+ mbstate_t state;
+
+ p = buf;
+ *p++ = dfa->nodes[node].opr.c;
+ while (++node < dfa->nodes_len
+ && dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].mb_partial)
+ *p++ = dfa->nodes[node].opr.c;
+ memset (&state, 0, sizeof (state));
+ if (mbrtowc (&wc, (const char *) buf, p - buf,
+ &state) == p - buf
+ && __wcrtomb ((char *) buf, towlower (wc), &state) > 0)
+ re_set_fastmap (fastmap, 0, buf[0]);
+ }
+#endif
+ }
+ else if (type == SIMPLE_BRACKET)
+ {
+ int i, j, ch;
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+ re_set_fastmap (fastmap, icase, ch);
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET)
+ {
+ int i;
+ re_charset_t *cset = dfa->nodes[node].opr.mbcset;
+ if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
+ || cset->nranges || cset->nchar_classes)
+ {
+# ifdef _LIBC
+ if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
+ {
+ /* In this case we want to catch the bytes which are
+ the first byte of any collation elements.
+ e.g. In da_DK, we want to catch 'a' since "aa"
+ is a valid collation element, and don't catch
+ 'b' since 'b' is the only collation element
+ which starts from 'b'. */
+ int j, ch;
+ const int32_t *table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (table[ch] < 0)
+ re_set_fastmap (fastmap, icase, ch);
+ }
+# else
+ if (dfa->mb_cur_max > 1)
+ for (i = 0; i < SBC_MAX; ++i)
+ if (__btowc (i) == WEOF)
+ re_set_fastmap (fastmap, icase, i);
+# endif /* not _LIBC */
+ }
+ for (i = 0; i < cset->nmbchars; ++i)
+ {
+ char buf[256];
+ mbstate_t state;
+ memset (&state, '\0', sizeof (state));
+ __wcrtomb (buf, cset->mbchars[i], &state);
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ {
+ __wcrtomb (buf, towlower (cset->mbchars[i]), &state);
+ re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_PERIOD
+#ifdef RE_ENABLE_I18N
+ || type == OP_UTF8_PERIOD
+#endif /* RE_ENABLE_I18N */
+ || type == END_OF_RE)
+ {
+ memset (fastmap, '\1', sizeof (char) * SBC_MAX);
+ if (type == END_OF_RE)
+ bufp->can_be_null = 1;
+ return;
+ }
+ }
+}
+
+/* Entry point for POSIX code. */
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *__restrict preg;
+ const char *__restrict pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ preg->used = 0;
+
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = re_malloc (char, SBC_MAX);
+ if (BE (preg->fastmap == NULL, 0))
+ return REG_ESPACE;
+
+ syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+ preg->no_sub = !!(cflags & REG_NOSUB);
+ preg->translate = NULL;
+
+ ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN)
+ ret = REG_EPAREN;
+
+ /* We have already checked preg->fastmap != NULL. */
+ if (BE (ret == REG_NOERROR, 1))
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. This function never fails in this implementation. */
+ (void) re_compile_fastmap (preg);
+ else
+ {
+ /* Some error occurred while compiling the expression. */
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+
+ return (int) ret;
+}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (BE (errcode < 0
+ || errcode >= (int) (sizeof (__re_error_msgid_idx)
+ / sizeof (__re_error_msgid_idx[0])), 0))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (BE (errbuf_size != 0, 1))
+ {
+ if (BE (msg_size > errbuf_size, 0))
+ {
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+#endif
+ }
+ else
+ memcpy (errbuf, msg, msg_size);
+ }
+
+ return msg_size;
+}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
+
+
+static void
+free_dfa_content (re_dfa_t *dfa)
+{
+ int i, j;
+
+ re_free (dfa->subexps);
+
+ if (dfa->nodes)
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ re_token_t *node = dfa->nodes + i;
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+ }
+ re_free (dfa->nexts);
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ if (dfa->eclosures != NULL)
+ re_node_set_free (dfa->eclosures + i);
+ if (dfa->inveclosures != NULL)
+ re_node_set_free (dfa->inveclosures + i);
+ if (dfa->edests != NULL)
+ re_node_set_free (dfa->edests + i);
+ }
+ re_free (dfa->edests);
+ re_free (dfa->eclosures);
+ re_free (dfa->inveclosures);
+ re_free (dfa->nodes);
+
+ if (dfa->state_table)
+ for (i = 0; i <= dfa->state_hash_mask; ++i)
+ {
+ struct re_state_table_entry *entry = dfa->state_table + i;
+ for (j = 0; j < entry->num; ++j)
+ {
+ re_dfastate_t *state = entry->array[j];
+ free_state (state);
+ }
+ re_free (entry->array);
+ }
+ re_free (dfa->state_table);
+#ifdef RE_ENABLE_I18N
+ re_free (dfa->sb_char);
+#endif
+#ifdef DEBUG
+ re_free (dfa->re_str);
+#endif
+
+ re_free (dfa);
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ if (BE (dfa != NULL, 1))
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+
+ re_free (preg->fastmap);
+ preg->fastmap = NULL;
+
+ re_free (preg->translate);
+ preg->translate = NULL;
+}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+# ifdef _LIBC
+/* Make these definitions weak in libc, so POSIX programs can redefine
+ these names if they don't use our functions, and still use
+ regcomp/regexec above without link errors. */
+weak_function
+# endif
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+ char *fastmap;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return gettext ("No previous regular expression");
+ return 0;
+ }
+
+ if (re_comp_buf.buffer)
+ {
+ fastmap = re_comp_buf.fastmap;
+ re_comp_buf.fastmap = NULL;
+ __regfree (&re_comp_buf);
+ memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
+ re_comp_buf.fastmap = fastmap;
+ }
+
+ if (re_comp_buf.fastmap == NULL)
+ {
+ re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
+ if (re_comp_buf.fastmap == NULL)
+ return (char *) gettext (__re_error_msgid
+ + __re_error_msgid_idx[(int) REG_ESPACE]);
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
+}
+
+#ifdef _LIBC
+libc_freeres_fn (free_mem)
+{
+ __regfree (&re_comp_buf);
+}
+#endif
+
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point.
+ Compile the regular expression PATTERN, whose length is LENGTH.
+ SYNTAX indicate regular expression's syntax. */
+
+static reg_errcode_t
+re_compile_internal (preg, pattern, length, syntax)
+ regex_t *preg;
+ const char * pattern;
+ int length;
+ reg_syntax_t syntax;
+{
+ reg_errcode_t err = REG_NOERROR;
+ re_dfa_t *dfa;
+ re_string_t regexp;
+
+ /* Initialize the pattern buffer. */
+ preg->fastmap_accurate = 0;
+ preg->syntax = syntax;
+ preg->not_bol = preg->not_eol = 0;
+ preg->used = 0;
+ preg->re_nsub = 0;
+ preg->can_be_null = 0;
+ preg->regs_allocated = REGS_UNALLOCATED;
+
+ /* Initialize the dfa. */
+ dfa = (re_dfa_t *) preg->buffer;
+ if (BE (preg->allocated < sizeof (re_dfa_t), 0))
+ {
+ /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. If ->buffer is NULL this
+ is a simple allocation. */
+ dfa = re_realloc (preg->buffer, re_dfa_t, 1);
+ if (dfa == NULL)
+ return REG_ESPACE;
+ preg->allocated = sizeof (re_dfa_t);
+ preg->buffer = (unsigned char *) dfa;
+ }
+ preg->used = sizeof (re_dfa_t);
+
+ err = init_dfa (dfa, length);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+#ifdef DEBUG
+ dfa->re_str = re_malloc (char, length + 1);
+ strncpy (dfa->re_str, pattern, length + 1);
+#endif
+
+ err = re_string_construct (&regexp, pattern, length, preg->translate,
+ syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_compile_internal_free_return:
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ return err;
+ }
+
+ /* Parse the regular expression, and build a structure tree. */
+ preg->re_nsub = 0;
+ dfa->str_tree = parse (&regexp, preg, syntax, &err);
+ if (BE (dfa->str_tree == NULL, 0))
+ goto re_compile_internal_free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* If possible, do searching in single byte encoding to speed things up. */
+ if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
+ optimize_utf8 (dfa);
+#endif
+
+ /* Analyze the tree and collect information which is necessary to
+ create the dfa. */
+ err = analyze (dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
+ /* Then create the initial state of the dfa. */
+ err = create_initial_state (dfa);
+
+ /* Release work areas. */
+ free_workarea_compile (preg);
+ re_string_destruct (&regexp);
+
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_dfa_content (dfa);
+ preg->buffer = NULL;
+ preg->allocated = 0;
+ }
+
+ return err;
+}
+
+/* Initialize DFA. We use the length of the regular expression PAT_LEN
+ as the initial length of some arrays. */
+
+static reg_errcode_t
+init_dfa (dfa, pat_len)
+ re_dfa_t *dfa;
+ int pat_len;
+{
+ int table_size;
+
+ memset (dfa, '\0', sizeof (re_dfa_t));
+
+ /* Force allocation of str_tree_storage the first time. */
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
+ dfa->nodes_alloc = pat_len + 1;
+ dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
+
+ dfa->states_alloc = pat_len + 1;
+
+ /* table_size = 2 ^ ceil(log pat_len) */
+ for (table_size = 1; table_size > 0; table_size <<= 1)
+ if (table_size > pat_len)
+ break;
+
+ dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
+ dfa->state_hash_mask = table_size - 1;
+
+ dfa->subexps_alloc = 1;
+ dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
+
+ dfa->mb_cur_max = MB_CUR_MAX;
+#ifdef _LIBC
+ if (dfa->mb_cur_max == 6
+ && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
+ dfa->is_utf8 = 1;
+ dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
+ != 0);
+#endif
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ int i, j, ch;
+
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ if (BE (dfa->sb_char == NULL, 0))
+ return REG_ESPACE;
+ if (dfa->is_utf8)
+ memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ else
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (btowc (ch) != WEOF)
+ dfa->sb_char[i] |= 1 << j;
+ }
+#endif
+
+ if (BE (dfa->nodes == NULL || dfa->state_table == NULL
+ || dfa->subexps == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+/* Initialize WORD_CHAR table, which indicate which character is
+ "word". In this case "word" means that it is the word construction
+ character used by some operators like "\<", "\>", etc. */
+
+static void
+init_word_char (dfa)
+ re_dfa_t *dfa;
+{
+ int i, j, ch;
+ dfa->word_ops_used = 1;
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ if (isalnum (ch) || ch == '_')
+ dfa->word_char[i] |= 1 << j;
+}
+
+/* Free the work area which are only used while compiling. */
+
+static void
+free_workarea_compile (preg)
+ regex_t *preg;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_storage_t *storage, *next;
+ for (storage = dfa->str_tree_storage; storage; storage = next)
+ {
+ next = storage->next;
+ re_free (storage);
+ }
+ dfa->str_tree_storage = NULL;
+ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+ dfa->str_tree = NULL;
+ re_free (dfa->org_indices);
+ dfa->org_indices = NULL;
+}
+
+/* Create initial states for all contexts. */
+
+static reg_errcode_t
+create_initial_state (dfa)
+ re_dfa_t *dfa;
+{
+ int first, i;
+ reg_errcode_t err;
+ re_node_set init_nodes;
+
+ /* Initial states have the epsilon closure of the node which is
+ the first node of the regular expression. */
+ first = dfa->str_tree->first;
+ dfa->init_node = first;
+ err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* The back-references which are in initial states can epsilon transit,
+ since in this case all of the subexpressions can be null.
+ Then we add epsilon closures of the nodes which are the next nodes of
+ the back-references. */
+ if (dfa->nbackref > 0)
+ for (i = 0; i < init_nodes.nelem; ++i)
+ {
+ int node_idx = init_nodes.elems[i];
+ re_token_type_t type = dfa->nodes[node_idx].type;
+
+ int clexp_idx;
+ if (type != OP_BACK_REF)
+ continue;
+ for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
+ {
+ re_token_t *clexp_node;
+ clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
+ if (clexp_node->type == OP_CLOSE_SUBEXP
+ && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
+ break;
+ }
+ if (clexp_idx == init_nodes.nelem)
+ continue;
+
+ if (type == OP_BACK_REF)
+ {
+ int dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
+ {
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
+ i = 0;
+ }
+ }
+ }
+
+ /* It must be the first time to invoke acquire_state. */
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (BE (dfa->init_state == NULL, 0))
+ return err;
+ if (dfa->init_state->has_constraint)
+ {
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_WORD);
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
+ CONTEXT_NEWLINE);
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
+ CONTEXT_NEWLINE
+ | CONTEXT_BEGBUF);
+ if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return err;
+ }
+ else
+ dfa->init_state_word = dfa->init_state_nl
+ = dfa->init_state_begbuf = dfa->init_state;
+
+ re_node_set_free (&init_nodes);
+ return REG_NOERROR;
+}
+
+#ifdef RE_ENABLE_I18N
+/* If it is possible to do searching in single byte encoding instead of UTF-8
+ to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
+ DFA nodes where needed. */
+
+static void
+optimize_utf8 (dfa)
+ re_dfa_t *dfa;
+{
+ int node, i, mb_chars = 0, has_period = 0;
+
+ for (node = 0; node < dfa->nodes_len; ++node)
+ switch (dfa->nodes[node].type)
+ {
+ case CHARACTER:
+ if (dfa->nodes[node].opr.c >= 0x80)
+ mb_chars = 1;
+ break;
+ case ANCHOR:
+ switch (dfa->nodes[node].opr.idx)
+ {
+ case LINE_FIRST:
+ case LINE_LAST:
+ case BUF_FIRST:
+ case BUF_LAST:
+ break;
+ default:
+ /* Word anchors etc. cannot be handled. */
+ return;
+ }
+ break;
+ case OP_PERIOD:
+ has_period = 1;
+ break;
+ case OP_BACK_REF:
+ case OP_ALT:
+ case END_OF_RE:
+ case OP_DUP_ASTERISK:
+ case OP_DUP_QUESTION:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ break;
+ case SIMPLE_BRACKET:
+ /* Just double check. */
+ for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
+ if (dfa->nodes[node].opr.sbcset[i])
+ return;
+ break;
+ default:
+ return;
+ }
+
+ if (mb_chars || has_period)
+ for (node = 0; node < dfa->nodes_len; ++node)
+ {
+ if (dfa->nodes[node].type == CHARACTER
+ && dfa->nodes[node].opr.c >= 0x80)
+ dfa->nodes[node].mb_partial = 0;
+ else if (dfa->nodes[node].type == OP_PERIOD)
+ dfa->nodes[node].type = OP_UTF8_PERIOD;
+ }
+
+ /* The search can be in single byte locale. */
+ dfa->mb_cur_max = 1;
+ dfa->is_utf8 = 0;
+ dfa->has_mb_node = dfa->nbackref > 0 || has_period;
+}
+#endif
+
+/* Analyze the structure tree, and calculate "first", "next", "edest",
+ "eclosure", and "inveclosure". */
+
+static reg_errcode_t
+analyze (dfa)
+ re_dfa_t *dfa;
+{
+ int i;
+ reg_errcode_t ret;
+
+ /* Allocate arrays. */
+ dfa->nexts = re_malloc (int, dfa->nodes_alloc);
+ dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
+ dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
+ || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
+ return REG_ESPACE;
+ /* Initialize them. */
+ for (i = 0; i < dfa->nodes_len; ++i)
+ {
+ dfa->nexts[i] = -1;
+ re_node_set_init_empty (dfa->edests + i);
+ re_node_set_init_empty (dfa->eclosures + i);
+ re_node_set_init_empty (dfa->inveclosures + i);
+ }
+
+ ret = analyze_tree (dfa, dfa->str_tree);
+ if (BE (ret == REG_NOERROR, 1))
+ {
+ ret = calc_eclosure (dfa);
+ if (ret == REG_NOERROR)
+ calc_inveclosure (dfa);
+ }
+ return ret;
+}
+
+/* Helper functions for analyze.
+ This function calculate "first", "next", and "edest" for the subtree
+ whose root is NODE. */
+
+static reg_errcode_t
+analyze_tree (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ reg_errcode_t ret;
+ if (node->first == -1)
+ calc_first (dfa, node);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ if (node->eclosure.nelem == 0)
+ calc_epsdest (dfa, node);
+ /* Calculate "first" etc. for the left child. */
+ if (node->left != NULL)
+ {
+ ret = analyze_tree (dfa, node->left);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ /* Calculate "first" etc. for the right child. */
+ if (node->right != NULL)
+ {
+ ret = analyze_tree (dfa, node->right);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate "first" for the node NODE. */
+static void
+calc_first (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx, type;
+ idx = node->node_idx;
+ type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
+
+ switch (type)
+ {
+#ifdef DEBUG
+ case OP_OPEN_BRACKET:
+ case OP_CLOSE_BRACKET:
+ case OP_OPEN_DUP_NUM:
+ case OP_CLOSE_DUP_NUM:
+ case OP_DUP_PLUS:
+ case OP_NON_MATCH_LIST:
+ case OP_OPEN_COLL_ELEM:
+ case OP_CLOSE_COLL_ELEM:
+ case OP_OPEN_EQUIV_CLASS:
+ case OP_CLOSE_EQUIV_CLASS:
+ case OP_OPEN_CHAR_CLASS:
+ case OP_CLOSE_CHAR_CLASS:
+ /* These must not appear here. */
+ assert (0);
+#endif
+ case END_OF_RE:
+ case CHARACTER:
+ case OP_PERIOD:
+ case OP_DUP_ASTERISK:
+ case OP_DUP_QUESTION:
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ case COMPLEX_BRACKET:
+#endif /* RE_ENABLE_I18N */
+ case SIMPLE_BRACKET:
+ case OP_BACK_REF:
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ node->first = idx;
+ break;
+ case OP_ALT:
+ node->first = idx;
+ break;
+ /* else fall through */
+ default:
+#ifdef DEBUG
+ assert (node->left != NULL);
+#endif
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ node->first = node->left->first;
+ break;
+ }
+}
+
+/* Calculate "next" for the node NODE. */
+
+static void
+calc_next (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx, type;
+ bin_tree_t *parent = node->parent;
+ if (parent == NULL)
+ {
+ node->next = -1;
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
+ return;
+ }
+
+ idx = parent->node_idx;
+ type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
+
+ switch (type)
+ {
+ case OP_DUP_ASTERISK:
+ node->next = idx;
+ break;
+ case CONCAT:
+ if (parent->left == node)
+ {
+ if (parent->right->first == -1)
+ calc_first (dfa, parent->right);
+ node->next = parent->right->first;
+ break;
+ }
+ /* else fall through */
+ default:
+ if (parent->next == -1)
+ calc_next (dfa, parent);
+ node->next = parent->next;
+ break;
+ }
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
+}
+
+/* Calculate "edest" for the node NODE. */
+
+static void
+calc_epsdest (dfa, node)
+ re_dfa_t *dfa;
+ bin_tree_t *node;
+{
+ int idx;
+ idx = node->node_idx;
+ if (node->type == 0)
+ {
+ if (dfa->nodes[idx].type == OP_DUP_ASTERISK
+ || dfa->nodes[idx].type == OP_DUP_QUESTION)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ re_node_set_init_2 (dfa->edests + idx, node->left->first,
+ node->next);
+ }
+ else if (dfa->nodes[idx].type == OP_ALT)
+ {
+ int left, right;
+ if (node->left != NULL)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ left = node->left->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ left = node->next;
+ }
+ if (node->right != NULL)
+ {
+ if (node->right->first == -1)
+ calc_first (dfa, node->right);
+ right = node->right->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ right = node->next;
+ }
+ re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ else if (dfa->nodes[idx].type == ANCHOR
+ || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+ || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+ || dfa->nodes[idx].type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, node->next);
+ else
+ assert (!IS_EPSILON_NODE (dfa->nodes[idx].type));
+ }
+}
+
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
+ init_constraint)
+ re_dfa_t *dfa;
+ int top_org_node, top_clone_node, root_node;
+ unsigned int init_constraint;
+{
+ reg_errcode_t err;
+ int org_node, clone_node, ret;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ int org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ if (dfa->nodes[org_node].type == ANCHOR)
+ {
+ /* In case of the node has another constraint, append it. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ /* ...but if the node is root_node itself, it means the
+ epsilon closure have a loop, then tie it to the
+ destination of the root_node. */
+ ret = re_node_set_insert (dfa->edests + clone_node,
+ org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
+ }
+ constraint |= dfa->nodes[org_node].opr.ctx_type;
+ }
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. E.g. '|', '*', '+', '?'. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ /* Search for a duplicated node which satisfies the constraint. */
+ clone_dest = search_duplicated_node (dfa, org_dest, constraint);
+ if (clone_dest == -1)
+ {
+ /* There are no such a duplicated node, create a new one. */
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ err = duplicate_node_closure (dfa, org_dest, clone_dest,
+ root_node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ {
+ /* There are a duplicated node which satisfy the constraint,
+ use it to avoid infinite loop. */
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+
+ org_dest = dfa->edests[org_node].elems[1];
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
+/* Search for a node which is duplicated from the node ORG_NODE, and
+ satisfies the constraint CONSTRAINT. */
+
+static int
+search_duplicated_node (dfa, org_node, constraint)
+ re_dfa_t *dfa;
+ int org_node;
+ unsigned int constraint;
+{
+ int idx;
+ for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
+ {
+ if (org_node == dfa->org_indices[idx]
+ && constraint == dfa->nodes[idx].constraint)
+ return idx; /* Found. */
+ }
+ return -1; /* Not found. */
+}
+
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+ otherwise return the error code. */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
+ re_dfa_t *dfa;
+ int *new_idx, org_idx;
+ unsigned int constraint;
+{
+ int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
+ if (BE (dup_idx == -1, 0))
+ return REG_ESPACE;
+ dfa->nodes[dup_idx].constraint = constraint;
+ if (dfa->nodes[org_idx].type == ANCHOR)
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+ dfa->nodes[dup_idx].duplicated = 1;
+ re_node_set_init_empty (dfa->edests + dup_idx);
+ re_node_set_init_empty (dfa->eclosures + dup_idx);
+ re_node_set_init_empty (dfa->inveclosures + dup_idx);
+
+ /* Store the index of the original node. */
+ dfa->org_indices[dup_idx] = org_idx;
+ *new_idx = dup_idx;
+ return REG_NOERROR;
+}
+
+static void
+calc_inveclosure (dfa)
+ re_dfa_t *dfa;
+{
+ int src, idx, dest;
+ for (src = 0; src < dfa->nodes_len; ++src)
+ {
+ for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
+ {
+ dest = dfa->eclosures[src].elems[idx];
+ re_node_set_insert (dfa->inveclosures + dest, src);
+ }
+ }
+}
+
+/* Calculate "eclosure" for all the node in DFA. */
+
+static reg_errcode_t
+calc_eclosure (dfa)
+ re_dfa_t *dfa;
+{
+ int node_idx, incomplete;
+#ifdef DEBUG
+ assert (dfa->nodes_len > 0);
+#endif
+ incomplete = 0;
+ /* For each nodes, calculate epsilon closure. */
+ for (node_idx = 0; ; ++node_idx)
+ {
+ reg_errcode_t err;
+ re_node_set eclosure_elem;
+ if (node_idx == dfa->nodes_len)
+ {
+ if (!incomplete)
+ break;
+ incomplete = 0;
+ node_idx = 0;
+ }
+
+#ifdef DEBUG
+ assert (dfa->eclosures[node_idx].nelem != -1);
+#endif
+ /* If we have already calculated, skip it. */
+ if (dfa->eclosures[node_idx].nelem != 0)
+ continue;
+ /* Calculate epsilon closure of `node_idx'. */
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (dfa->eclosures[node_idx].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Calculate epsilon closure of NODE. */
+
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+ re_node_set *new_set;
+ re_dfa_t *dfa;
+ int node, root;
+{
+ reg_errcode_t err;
+ unsigned int constraint;
+ int i, incomplete;
+ re_node_set eclosure;
+ incomplete = 0;
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ /* This indicates that we are calculating this node now.
+ We reference this value to avoid infinite loop. */
+ dfa->eclosures[node].nelem = -1;
+
+ constraint = ((dfa->nodes[node].type == ANCHOR)
+ ? dfa->nodes[node].opr.ctx_type : 0);
+ /* If the current node has constraints, duplicate all nodes.
+ Since they must inherit the constraints. */
+ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ int org_node, cur_node;
+ org_node = cur_node = node;
+ err = duplicate_node_closure (dfa, node, node, node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Expand each epsilon destination nodes. */
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
+ for (i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ re_node_set eclosure_elem;
+ int edest = dfa->edests[node].elems[i];
+ /* If calculating the epsilon closure of `edest' is in progress,
+ return intermediate result. */
+ if (dfa->eclosures[edest].nelem == -1)
+ {
+ incomplete = 1;
+ continue;
+ }
+ /* If we haven't calculated the epsilon closure of `edest' yet,
+ calculate now. Otherwise use calculated epsilon closure. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ eclosure_elem = dfa->eclosures[edest];
+ /* Merge the epsilon closure of `edest'. */
+ re_node_set_merge (&eclosure, &eclosure_elem);
+ /* If the epsilon closure of `edest' is incomplete,
+ the epsilon closure of this node is also incomplete. */
+ if (dfa->eclosures[edest].nelem == 0)
+ {
+ incomplete = 1;
+ re_node_set_free (&eclosure_elem);
+ }
+ }
+
+ /* Epsilon closures include itself. */
+ re_node_set_insert (&eclosure, node);
+ if (incomplete && !root)
+ dfa->eclosures[node].nelem = 0;
+ else
+ dfa->eclosures[node] = eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
+}
+
+/* Functions for token which are used in the parser. */
+
+/* Fetch a token from INPUT.
+ We must not use this function inside bracket expressions. */
+
+static void
+fetch_token (result, input, syntax)
+ re_token_t *result;
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ re_string_skip_bytes (input, peek_token (result, input, syntax));
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function inside bracket expressions. */
+
+static int
+peek_token (token, input, syntax)
+ re_token_t *token;
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ unsigned char c;
+
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+ token->word_char = 0;
+#ifdef RE_ENABLE_I18N
+ token->mb_partial = 0;
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ token->mb_partial = 1;
+ return 1;
+ }
+#endif
+ if (c == '\\')
+ {
+ unsigned char c2;
+ if (re_string_cur_idx (input) + 1 >= re_string_length (input))
+ {
+ token->type = BACK_SLASH;
+ return 1;
+ }
+
+ c2 = re_string_peek_byte_case (input, 1);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input,
+ re_string_cur_idx (input) + 1);
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (c2) != 0;
+
+ switch (c2)
+ {
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (!(syntax & RE_NO_BK_REFS))
+ {
+ token->type = OP_BACK_REF;
+ token->opr.idx = c2 - '0';
+ }
+ break;
+ case '<':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_FIRST;
+ }
+ break;
+ case '>':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_LAST;
+ }
+ break;
+ case 'b':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = WORD_DELIM;
+ }
+ break;
+ case 'B':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = INSIDE_WORD;
+ }
+ break;
+ case 'w':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_WORD;
+ break;
+ case 'W':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTWORD;
+ break;
+ case 's':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_SPACE;
+ break;
+ case 'S':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTSPACE;
+ break;
+ case '`':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_FIRST;
+ }
+ break;
+ case '\'':
+ if (!(syntax & RE_NO_GNU_OPS))
+ {
+ token->type = ANCHOR;
+ token->opr.ctx_type = BUF_LAST;
+ }
+ break;
+ case '(':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (!(syntax & RE_NO_BK_PARENS))
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ default:
+ break;
+ }
+ return 2;
+ }
+
+ token->type = CHARACTER;
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1)
+ {
+ wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
+ token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
+ }
+ else
+#endif
+ token->word_char = IS_WORD_CHAR (token->opr.c);
+
+ switch (c)
+ {
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ token->type = OP_ALT;
+ break;
+ case '|':
+ if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
+ token->type = OP_ALT;
+ break;
+ case '*':
+ token->type = OP_DUP_ASTERISK;
+ break;
+ case '+':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_PLUS;
+ break;
+ case '?':
+ if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
+ token->type = OP_DUP_QUESTION;
+ break;
+ case '{':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_OPEN_DUP_NUM;
+ break;
+ case '}':
+ if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ token->type = OP_CLOSE_DUP_NUM;
+ break;
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_OPEN_SUBEXP;
+ break;
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ token->type = OP_CLOSE_SUBEXP;
+ break;
+ case '[':
+ token->type = OP_OPEN_BRACKET;
+ break;
+ case '.':
+ token->type = OP_PERIOD;
+ break;
+ case '^':
+ if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
+ re_string_cur_idx (input) != 0)
+ {
+ char prev = re_string_peek_byte (input, -1);
+ if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_FIRST;
+ break;
+ case '$':
+ if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+ re_string_cur_idx (input) + 1 != re_string_length (input))
+ {
+ re_token_t next;
+ re_string_skip_bytes (input, 1);
+ peek_token (&next, input, syntax);
+ re_string_skip_bytes (input, -1);
+ if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
+ break;
+ }
+ token->type = ANCHOR;
+ token->opr.ctx_type = LINE_LAST;
+ break;
+ default:
+ break;
+ }
+ return 1;
+}
+
+/* Peek a token from INPUT, and return the length of the token.
+ We must not use this function out of bracket expressions. */
+
+static int
+peek_token_bracket (token, input, syntax)
+ re_token_t *token;
+ re_string_t *input;
+ reg_syntax_t syntax;
+{
+ unsigned char c;
+ if (re_string_eoi (input))
+ {
+ token->type = END_OF_RE;
+ return 0;
+ }
+ c = re_string_peek_byte (input, 0);
+ token->opr.c = c;
+
+#ifdef RE_ENABLE_I18N
+ if (input->mb_cur_max > 1 &&
+ !re_string_first_byte (input, re_string_cur_idx (input)))
+ {
+ token->type = CHARACTER;
+ return 1;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
+ && re_string_cur_idx (input) + 1 < re_string_length (input))
+ {
+ /* In this case, '\' escape a character. */
+ unsigned char c2;
+ re_string_skip_bytes (input, 1);
+ c2 = re_string_peek_byte (input, 0);
+ token->opr.c = c2;
+ token->type = CHARACTER;
+ return 1;
+ }
+ if (c == '[') /* '[' is a special char in a bracket exps. */
+ {
+ unsigned char c2;
+ int token_len;
+ if (re_string_cur_idx (input) + 1 < re_string_length (input))
+ c2 = re_string_peek_byte (input, 1);
+ else
+ c2 = 0;
+ token->opr.c = c2;
+ token_len = 2;
+ switch (c2)
+ {
+ case '.':
+ token->type = OP_OPEN_COLL_ELEM;
+ break;
+ case '=':
+ token->type = OP_OPEN_EQUIV_CLASS;
+ break;
+ case ':':
+ if (syntax & RE_CHAR_CLASSES)
+ {
+ token->type = OP_OPEN_CHAR_CLASS;
+ break;
+ }
+ /* else fall through. */
+ default:
+ token->type = CHARACTER;
+ token->opr.c = c;
+ token_len = 1;
+ break;
+ }
+ return token_len;
+ }
+ switch (c)
+ {
+ case '-':
+ token->type = OP_CHARSET_RANGE;
+ break;
+ case ']':
+ token->type = OP_CLOSE_BRACKET;
+ break;
+ case '^':
+ token->type = OP_NON_MATCH_LIST;
+ break;
+ default:
+ token->type = CHARACTER;
+ }
+ return 1;
+}
+
+/* Functions for parser. */
+
+/* Entry point of the parser.
+ Parse the regular expression REGEXP and return the structure tree.
+ If an error is occured, ERR is set by error code, and return NULL.
+ This function build the following tree, from regular expression <reg_exp>:
+ CAT
+ / \
+ / \
+ <reg_exp> EOR
+
+ CAT means concatenation.
+ EOR means end of regular expression. */
+
+static bin_tree_t *
+parse (regexp, preg, syntax, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *eor, *root;
+ re_token_t current_token;
+ dfa->syntax = syntax;
+ fetch_token (&current_token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ eor = re_dfa_add_tree_node (dfa, NULL, NULL, &current_token);
+ if (tree != NULL)
+ root = create_tree (dfa, tree, eor, CONCAT, 0);
+ else
+ root = eor;
+ if (BE (eor == NULL || root == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ return root;
+}
+
+/* This function build the following tree, from regular expression
+ <branch1>|<branch2>:
+ ALT
+ / \
+ / \
+ <branch1> <branch2>
+
+ ALT means alternative, which represents the operator `|'. */
+
+static bin_tree_t *
+parse_reg_exp (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *branch = NULL;
+ tree = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type == OP_ALT)
+ {
+ re_token_t alt_token = *token;
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+ if (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ branch = parse_branch (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && branch == NULL, 0))
+ return NULL;
+ }
+ else
+ branch = NULL;
+ tree = re_dfa_add_tree_node (dfa, tree, branch, &alt_token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->has_plural_match = 1;
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ <exp1><exp2>:
+ CAT
+ / \
+ / \
+ <exp1> <exp2>
+
+ CAT means concatenation. */
+
+static bin_tree_t *
+parse_branch (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ bin_tree_t *tree, *exp;
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ tree = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+
+ while (token->type != OP_ALT && token->type != END_OF_RE
+ && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
+ {
+ exp = parse_expression (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && exp == NULL, 0))
+ {
+ return NULL;
+ }
+ if (tree != NULL && exp != NULL)
+ {
+ tree = create_tree (dfa, tree, exp, CONCAT, 0);
+ if (tree == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else if (tree == NULL)
+ tree = exp;
+ /* Otherwise exp == NULL, we don't need to create new tree. */
+ }
+ return tree;
+}
+
+/* This function build the following tree, from regular expression a*:
+ *
+ |
+ a
+*/
+
+static bin_tree_t *
+parse_expression (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree;
+ switch (token->type)
+ {
+ case CHARACTER:
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (!re_string_eoi (regexp)
+ && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
+ {
+ bin_tree_t *mbc_remain;
+ fetch_token (token, regexp, syntax);
+ mbc_remain = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree, mbc_remain, CONCAT, 0);
+ if (BE (mbc_remain == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+#endif
+ break;
+ case OP_OPEN_SUBEXP:
+ tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_OPEN_BRACKET:
+ tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_BACK_REF:
+ if (BE (preg->re_nsub < token->opr.idx
+ || dfa->subexps[token->opr.idx - 1].end == -1, 0))
+ {
+ *err = REG_ESUBREG;
+ return NULL;
+ }
+ dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ ++dfa->nbackref;
+ dfa->has_mb_node = 1;
+ break;
+ case OP_OPEN_DUP_NUM:
+ if (syntax & RE_CONTEXT_INVALID_DUP)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ /* FALLTHROUGH */
+ case OP_DUP_ASTERISK:
+ case OP_DUP_PLUS:
+ case OP_DUP_QUESTION:
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ {
+ fetch_token (token, regexp, syntax);
+ return parse_expression (regexp, preg, token, syntax, nest, err);
+ }
+ /* else fall through */
+ case OP_CLOSE_SUBEXP:
+ if ((token->type == OP_CLOSE_SUBEXP) &&
+ !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
+ /* else fall through */
+ case OP_CLOSE_DUP_NUM:
+ /* We treat it as a normal character. */
+
+ /* Then we can these characters as normal characters. */
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be initialized already
+ by peek_token. */
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ break;
+ case ANCHOR:
+ if ((token->opr.ctx_type
+ & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
+ && dfa->word_ops_used == 0)
+ init_word_char (dfa);
+ if (token->opr.ctx_type == WORD_DELIM)
+ {
+ bin_tree_t *tree_first, *tree_last;
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->type = OP_ALT;
+ tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token);
+ if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ else
+ {
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ /* We must return here, since ANCHORs can't be followed
+ by repetition operators.
+ eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>",
+ it must not be "<ANCHOR(^)><REPEAT(*)>". */
+ fetch_token (token, regexp, syntax);
+ return tree;
+ case OP_PERIOD:
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ if (dfa->mb_cur_max > 1)
+ dfa->has_mb_node = 1;
+ break;
+ case OP_WORD:
+ tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_NOTWORD:
+ tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_SPACE:
+ tree = build_charclass_op (dfa, regexp->trans, "space", "", 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_NOTSPACE:
+ tree = build_charclass_op (dfa, regexp->trans, "space", "", 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_ALT:
+ case END_OF_RE:
+ return NULL;
+ case BACK_SLASH:
+ *err = REG_EESCAPE;
+ return NULL;
+ default:
+ /* Must not happen? */
+#ifdef DEBUG
+ assert (0);
+#endif
+ return NULL;
+ }
+ fetch_token (token, regexp, syntax);
+
+ while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
+ || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
+ {
+ tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ /* In BRE consecutive duplications are not allowed. */
+ if ((syntax & RE_CONTEXT_INVALID_DUP)
+ && (token->type == OP_DUP_ASTERISK
+ || token->type == OP_OPEN_DUP_NUM))
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
+ dfa->has_plural_match = 1;
+ }
+
+ return tree;
+}
+
+/* This function build the following tree, from regular expression
+ (<reg_exp>):
+ SUBEXP
+ |
+ <reg_exp>
+*/
+
+static bin_tree_t *
+parse_sub_exp (regexp, preg, token, syntax, nest, err)
+ re_string_t *regexp;
+ regex_t *preg;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ int nest;
+ reg_errcode_t *err;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ bin_tree_t *tree, *left_par, *right_par;
+ size_t cur_nsub;
+ cur_nsub = preg->re_nsub++;
+ if (BE (dfa->subexps_alloc < preg->re_nsub, 0))
+ {
+ re_subexp_t *new_array;
+ dfa->subexps_alloc *= 2;
+ new_array = re_realloc (dfa->subexps, re_subexp_t, dfa->subexps_alloc);
+ if (BE (new_array == NULL, 0))
+ {
+ dfa->subexps_alloc /= 2;
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->subexps = new_array;
+ }
+ dfa->subexps[cur_nsub].start = dfa->nodes_len;
+ dfa->subexps[cur_nsub].end = -1;
+
+ left_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (left_par == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->nodes[left_par->node_idx].opr.idx = cur_nsub;
+ fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
+
+ /* The subexpression may be a null string. */
+ if (token->type == OP_CLOSE_SUBEXP)
+ tree = NULL;
+ else
+ {
+ tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ }
+ if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+ {
+ *err = REG_EPAREN;
+ return NULL;
+ }
+ right_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ dfa->subexps[cur_nsub].end = dfa->nodes_len;
+ tree = ((tree == NULL) ? right_par
+ : create_tree (dfa, tree, right_par, CONCAT, 0));
+ tree = create_tree (dfa, left_par, tree, CONCAT, 0);
+ if (BE (right_par == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->nodes[right_par->node_idx].opr.idx = cur_nsub;
+
+ return tree;
+}
+
+/* This function parse repetition operators like "*", "+", "{1,3}" etc. */
+
+static bin_tree_t *
+parse_dup_op (elem, regexp, dfa, token, syntax, err)
+ bin_tree_t *elem;
+ re_string_t *regexp;
+ re_dfa_t *dfa;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+ re_token_t dup_token;
+ bin_tree_t *tree = NULL;
+ int i, start, end, start_idx = re_string_cur_idx (regexp);
+ re_token_t start_token = *token;
+
+ if (token->type == OP_OPEN_DUP_NUM)
+ {
+ end = 0;
+ start = fetch_number (regexp, token, syntax);
+ if (start == -1)
+ {
+ if (token->type == CHARACTER && token->opr.c == ',')
+ start = 0; /* We treat "{,m}" as "{0,m}". */
+ else
+ {
+ *err = REG_BADBR; /* <re>{} is invalid. */
+ return NULL;
+ }
+ }
+ if (BE (start != -2, 1))
+ {
+ /* We treat "{n}" as "{n,n}". */
+ end = ((token->type == OP_CLOSE_DUP_NUM) ? start
+ : ((token->type == CHARACTER && token->opr.c == ',')
+ ? fetch_number (regexp, token, syntax) : -2));
+ }
+ if (BE (start == -2 || end == -2, 0))
+ {
+ /* Invalid sequence. */
+ if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
+ {
+ if (token->type == END_OF_RE)
+ *err = REG_EBRACE;
+ else
+ *err = REG_BADBR;
+
+ return NULL;
+ }
+
+ /* If the syntax bit is set, rollback. */
+ re_string_set_index (regexp, start_idx);
+ *token = start_token;
+ token->type = CHARACTER;
+ /* mb_partial and word_char bits should be already initialized by
+ peek_token. */
+ return elem;
+ }
+
+ if (BE (end != -1 && start > end, 0))
+ {
+ /* First number greater than second. */
+ *err = REG_BADBR;
+ return NULL;
+ }
+ }
+ else
+ {
+ start = (token->type == OP_DUP_PLUS) ? 1 : 0;
+ end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
+ }
+
+ /* Treat "<re>{0}*" etc. as "<re>{0}". */
+ if (BE (elem == NULL, 0))
+ start = end = 0;
+
+ /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
+ else if (BE (start > 0, 0))
+ {
+ tree = elem;
+ for (i = 2; i <= start; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+ }
+ }
+
+ if (BE (end != start, 1))
+ {
+ dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
+ if (BE (start > 0, 0))
+ {
+ elem = duplicate_tree (elem, dfa);
+ if (BE (elem == NULL, 0))
+ goto parse_dup_op_espace;
+
+ /* This subexpression will be marked as optional, so that
+ empty matches do not touch the registers. */
+ mark_opt_subexp (elem, dfa);
+
+ /* Prepare the tree with the modifier. */
+ elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
+ }
+ else
+ {
+ /* We do not need to duplicate the tree because we have not
+ created it yet. */
+ mark_opt_subexp (elem, dfa);
+ tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
+ }
+
+ if (BE (elem == NULL || tree == NULL, 0))
+ goto parse_dup_op_espace;
+
+ /* This loop is actually executed only when end != -1,
+ to rewrite <re>{0,n} as <re>?<re>?<re>?... We have
+ already created the start+1-th copy. */
+ for (i = start + 2; i <= end; ++i)
+ {
+ elem = duplicate_tree (elem, dfa);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
+ if (BE (elem == NULL || tree == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ }
+
+ fetch_token (token, regexp, syntax);
+ return tree;
+
+ parse_dup_op_espace:
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* Size of the names for collating symbol/equivalence_class/character_class.
+ I'm not sure, but maybe enough. */
+#define BRACKET_NAME_BUF_SIZE 32
+
+#ifndef _LIBC
+ /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+# else /* not RE_ENABLE_I18N */
+build_range_exp (sbcset, start_elem, end_elem)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+{
+ unsigned int start_ch, end_ch;
+ /* Equivalence Classes and Character Classes can't be a range start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ /* We can handle no multi character collating elements without libc
+ support. */
+ if (BE ((start_elem->type == COLL_SYM
+ && strlen ((char *) start_elem->opr.name) > 1)
+ || (end_elem->type == COLL_SYM
+ && strlen ((char *) end_elem->opr.name) > 1), 0))
+ return REG_ECOLLATE;
+
+# ifdef RE_ENABLE_I18N
+ {
+ wchar_t wc, start_wc, end_wc;
+ wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+
+ start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
+ ? __btowc (start_ch) : start_elem->opr.wch);
+ end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
+ ? __btowc (end_ch) : end_elem->opr.wch);
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ cmp_buf[0] = start_wc;
+ cmp_buf[4] = end_wc;
+ if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (mbcset)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wc = 0; wc < SBC_MAX; ++wc)
+ {
+ cmp_buf[2] = wc;
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ bitset_set (sbcset, wc);
+ }
+ }
+# else /* not RE_ENABLE_I18N */
+ {
+ unsigned int ch;
+ start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
+ : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
+ : 0));
+ end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
+ : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
+ : 0));
+ if (start_ch > end_ch)
+ return REG_ERANGE;
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ if (start_ch <= ch && ch <= end_ch)
+ bitset_set (sbcset, ch);
+ }
+# endif /* not RE_ENABLE_I18N */
+ return REG_NOERROR;
+}
+#endif /* not _LIBC */
+
+#ifndef _LIBC
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static reg_errcode_t
+# ifdef RE_ENABLE_I18N
+build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+# else /* not RE_ENABLE_I18N */
+build_collating_symbol (sbcset, name)
+# endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+{
+ size_t name_len = strlen ((const char *) name);
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+}
+#endif /* not _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (regexp, dfa, token, syntax, err)
+ re_string_t *regexp;
+ re_dfa_t *dfa;
+ re_token_t *token;
+ reg_syntax_t syntax;
+ reg_errcode_t *err;
+{
+#ifdef _LIBC
+ const unsigned char *collseqmb;
+ const char *collseqwc;
+ uint32_t nrules;
+ int32_t table_size;
+ const int32_t *symb_table;
+ const unsigned char *extra;
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Seek the collating symbol entry correspondings to NAME.
+ Return the index of the symbol in the SYMB_TABLE. */
+
+ static inline int32_t
+ __attribute ((always_inline))
+ seek_collating_symbol_entry (name, name_len)
+ const unsigned char *name;
+ size_t name_len;
+ {
+ int32_t hash = elem_hash ((const char *) name, name_len);
+ int32_t elem = hash % table_size;
+ int32_t second = hash % (table_size - 2);
+ while (symb_table[2 * elem] != 0)
+ {
+ /* First compare the hashing value. */
+ if (symb_table[2 * elem] == hash
+ /* Compare the length of the name. */
+ && name_len == extra[symb_table[2 * elem + 1]]
+ /* Compare the name. */
+ && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
+ name_len) == 0)
+ {
+ /* Yep, this is the entry. */
+ break;
+ }
+
+ /* Next entry. */
+ elem += second;
+ }
+ return elem;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+ static inline unsigned int
+ __attribute ((always_inline))
+ lookup_collation_sequence_value (br_elem)
+ bracket_elem_t *br_elem;
+ {
+ if (br_elem->type == SB_CHAR)
+ {
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument sinse we may
+ update it. */
+
+ static inline reg_errcode_t
+ __attribute ((always_inline))
+ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
+ re_charset_t *mbcset;
+ int *range_alloc;
+ re_bitset_ptr_t sbcset;
+ bracket_elem_t *start_elem, *end_elem;
+ {
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
+ 0))
+ return REG_ERANGE;
+
+ start_collseq = lookup_collation_sequence_value (start_elem);
+ end_collseq = lookup_collation_sequence_value (end_elem);
+ /* Check start/end collation sequence values. */
+ if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
+ return REG_ECOLLATE;
+ if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (BE (*range_alloc == mbcset->nranges, 0))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (BE (new_array_start == NULL || new_array_end == NULL, 0))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /*
+ if (MB_CUR_MAX == 1)
+ */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+ }
+
+ /* Local function for parse_bracket_exp used in _LIBC environement.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument sinse we may update it. */
+
+ static inline reg_errcode_t
+ __attribute ((always_inline))
+ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+ re_charset_t *mbcset;
+ int *coll_sym_alloc;
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+ {
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len);
+ if (symb_table[2 * elem] != 0)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (symb_table[2 * elem] == 0 && name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (BE (new_coll_syms == NULL, 0))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (BE (name_len != 1, 0))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+ }
+#endif
+
+ re_token_t br_token;
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
+ int equiv_class_alloc = 0, char_class_alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ int non_match = 0;
+ bin_tree_t *work_tree;
+ int token_len;
+ int first_round = 1;
+#ifdef _LIBC
+ collseqmb = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules)
+ {
+ /*
+ if (MB_CUR_MAX > 1)
+ */
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
+ symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_TABLEMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_SYMB_EXTRAMB);
+ }
+#endif
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else
+ if (BE (sbcset == NULL, 0))
+#endif /* RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_NON_MATCH_LIST)
+ {
+#ifdef RE_ENABLE_I18N
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ non_match = 1;
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set (sbcset, '\0');
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_BADPAT;
+ goto parse_bracket_exp_free_return;
+ }
+ }
+
+ /* We treat the first ']' as a normal character. */
+ if (token->type == OP_CLOSE_BRACKET)
+ token->type = CHARACTER;
+
+ while (1)
+ {
+ bracket_elem_t start_elem, end_elem;
+ unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
+ unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
+ reg_errcode_t ret;
+ int token_len2 = 0, is_range_exp = 0;
+ re_token_t token2;
+
+ start_elem.opr.name = start_name_buf;
+ ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
+ syntax, first_round);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+ first_round = 0;
+
+ /* Get information about the next token. We need it in any case. */
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+ /* Do not check for ranges if we know they are not allowed. */
+ if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
+ {
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CHARSET_RANGE)
+ {
+ re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
+ token_len2 = peek_token_bracket (&token2, regexp, syntax);
+ if (BE (token2.type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token2.type == OP_CLOSE_BRACKET)
+ {
+ /* We treat the last '-' as a normal character. */
+ re_string_skip_bytes (regexp, -token_len);
+ token->type = CHARACTER;
+ }
+ else
+ is_range_exp = 1;
+ }
+ }
+
+ if (is_range_exp == 1)
+ {
+ end_elem.opr.name = end_name_buf;
+ ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
+ dfa, syntax, 1);
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ *err = ret;
+ goto parse_bracket_exp_free_return;
+ }
+
+ token_len = peek_token_bracket (token, regexp, syntax);
+
+#ifdef _LIBC
+ *err = build_range_exp (sbcset, mbcset, &range_alloc,
+ &start_elem, &end_elem);
+#else
+# ifdef RE_ENABLE_I18N
+ *err = build_range_exp (sbcset,
+ dfa->mb_cur_max > 1 ? mbcset : NULL,
+ &range_alloc, &start_elem, &end_elem);
+# else
+ *err = build_range_exp (sbcset, &start_elem, &end_elem);
+# endif
+#endif /* RE_ENABLE_I18N */
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ }
+ else
+ {
+ switch (start_elem.type)
+ {
+ case SB_CHAR:
+ bitset_set (sbcset, start_elem.opr.ch);
+ break;
+#ifdef RE_ENABLE_I18N
+ case MB_CHAR:
+ /* Check whether the array has enough space. */
+ if (BE (mbchar_alloc == mbcset->nmbchars, 0))
+ {
+ wchar_t *new_mbchars;
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (BE (new_mbchars == NULL, 0))
+ goto parse_bracket_exp_espace;
+ mbcset->mbchars = new_mbchars;
+ }
+ mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
+ break;
+#endif /* RE_ENABLE_I18N */
+ case EQUIV_CLASS:
+ *err = build_equiv_class (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &equiv_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case COLL_SYM:
+ *err = build_collating_symbol (sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &coll_sym_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ case CHAR_CLASS:
+ *err = build_charclass (regexp->trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &char_class_alloc,
+#endif /* RE_ENABLE_I18N */
+ start_elem.opr.name, syntax);
+ if (BE (*err != REG_NOERROR, 0))
+ goto parse_bracket_exp_free_return;
+ break;
+ default:
+ assert (0);
+ break;
+ }
+ }
+ if (BE (token->type == END_OF_RE, 0))
+ {
+ *err = REG_EBRACK;
+ goto parse_bracket_exp_free_return;
+ }
+ if (token->type == OP_CLOSE_BRACKET)
+ break;
+ }
+
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+#endif /* RE_ENABLE_I18N */
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
+ || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
+ || mbcset->non_match)))
+ {
+ re_token_t alt_token;
+ bin_tree_t *mbc_tree;
+ int sbc_idx;
+ /* Build a tree for complex bracket. */
+ dfa->has_mb_node = 1;
+ for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
+ if (sbcset[sbc_idx])
+ break;
+ /* If there are no bits set in sbcset, there is no point
+ of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
+ if (sbc_idx == BITSET_UINTS)
+ {
+ re_free (sbcset);
+ dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET;
+ dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset;
+ return work_tree;
+ }
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ /* Then join them by ALT node. */
+ alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
+ work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
+ if (BE (mbc_tree != NULL, 1))
+ return work_tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return work_tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return work_tree;
+#endif /* not RE_ENABLE_I18N */
+
+ parse_bracket_exp_espace:
+ *err = REG_ESPACE;
+ parse_bracket_exp_free_return:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ return NULL;
+}
+
+/* Parse an element in the bracket expression. */
+
+static reg_errcode_t
+parse_bracket_element (elem, regexp, token, token_len, dfa, syntax,
+ accept_hyphen)
+ bracket_elem_t *elem;
+ re_string_t *regexp;
+ re_token_t *token;
+ int token_len;
+ re_dfa_t *dfa;
+ reg_syntax_t syntax;
+ int accept_hyphen;
+{
+#ifdef RE_ENABLE_I18N
+ int cur_char_size;
+ cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
+ if (cur_char_size > 1)
+ {
+ elem->type = MB_CHAR;
+ elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
+ re_string_skip_bytes (regexp, cur_char_size);
+ return REG_NOERROR;
+ }
+#endif /* RE_ENABLE_I18N */
+ re_string_skip_bytes (regexp, token_len); /* Skip a token. */
+ if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
+ || token->type == OP_OPEN_EQUIV_CLASS)
+ return parse_bracket_symbol (elem, regexp, token);
+ if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
+ {
+ /* A '-' must only appear as anything but a range indicator before
+ the closing bracket. Everything else is an error. */
+ re_token_t token2;
+ (void) peek_token_bracket (&token2, regexp, syntax);
+ if (token2.type != OP_CLOSE_BRACKET)
+ /* The actual error value is not standardized since this whole
+ case is undefined. But ERANGE makes good sense. */
+ return REG_ERANGE;
+ }
+ elem->type = SB_CHAR;
+ elem->opr.ch = token->opr.c;
+ return REG_NOERROR;
+}
+
+/* Parse a bracket symbol in the bracket expression. Bracket symbols are
+ such as [:<character_class>:], [.<collating_element>.], and
+ [=<equivalent_class>=]. */
+
+static reg_errcode_t
+parse_bracket_symbol (elem, regexp, token)
+ bracket_elem_t *elem;
+ re_string_t *regexp;
+ re_token_t *token;
+{
+ unsigned char ch, delim = token->opr.c;
+ int i = 0;
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ for (;; ++i)
+ {
+ if (i >= BRACKET_NAME_BUF_SIZE)
+ return REG_EBRACK;
+ if (token->type == OP_OPEN_CHAR_CLASS)
+ ch = re_string_fetch_byte_case (regexp);
+ else
+ ch = re_string_fetch_byte (regexp);
+ if (re_string_eoi(regexp))
+ return REG_EBRACK;
+ if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
+ break;
+ elem->opr.name[i] = ch;
+ }
+ re_string_skip_bytes (regexp, 1);
+ elem->opr.name[i] = '\0';
+ switch (token->type)
+ {
+ case OP_OPEN_COLL_ELEM:
+ elem->type = COLL_SYM;
+ break;
+ case OP_OPEN_EQUIV_CLASS:
+ elem->type = EQUIV_CLASS;
+ break;
+ case OP_OPEN_CHAR_CLASS:
+ elem->type = CHAR_CLASS;
+ break;
+ default:
+ break;
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the equivalence class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
+ re_charset_t *mbcset;
+ int *equiv_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_equiv_class (sbcset, name)
+#endif /* not RE_ENABLE_I18N */
+ re_bitset_ptr_t sbcset;
+ const unsigned char *name;
+{
+#if defined _LIBC
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra, *cp;
+ unsigned char char_buf[2];
+ int32_t idx1, idx2;
+ unsigned int ch;
+ size_t len;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+ /* Calculate the index for equivalence class. */
+ cp = name;
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ idx1 = findidx (&cp);
+ if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+ /* This isn't a valid character. */
+ return REG_ECOLLATE;
+
+ /* Build single byte matcing table for this equivalence class. */
+ char_buf[1] = (unsigned char) '\0';
+ len = weights[idx1];
+ for (ch = 0; ch < SBC_MAX; ++ch)
+ {
+ char_buf[0] = ch;
+ cp = char_buf;
+ idx2 = findidx (&cp);
+/*
+ idx2 = table[ch];
+*/
+ if (idx2 == 0)
+ /* This isn't a valid character. */
+ continue;
+ if (len == weights[idx2])
+ {
+ int cnt = 0;
+ while (cnt <= len &&
+ weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
+ ++cnt;
+
+ if (cnt > len)
+ bitset_set (sbcset, ch);
+ }
+ }
+ /* Check whether the array has enough space. */
+ if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
+ int32_t,
+ new_equiv_class_alloc);
+ if (BE (new_equiv_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->equiv_classes = new_equiv_classes;
+ *equiv_class_alloc = new_equiv_class_alloc;
+ }
+ mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
+ }
+ else
+#endif /* _LIBC */
+ {
+ if (BE (strlen ((const char *) name) != 1, 0))
+ return REG_ECOLLATE;
+ bitset_set (sbcset, *name);
+ }
+ return REG_NOERROR;
+}
+
+ /* Helper function for parse_bracket_exp.
+ Build the character class which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
+ is a pointer argument sinse we may update it. */
+
+static reg_errcode_t
+#ifdef RE_ENABLE_I18N
+build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
+ re_charset_t *mbcset;
+ int *char_class_alloc;
+#else /* not RE_ENABLE_I18N */
+build_charclass (trans, sbcset, class_name, syntax)
+#endif /* not RE_ENABLE_I18N */
+ unsigned RE_TRANSLATE_TYPE trans;
+ re_bitset_ptr_t sbcset;
+ const unsigned char *class_name;
+ reg_syntax_t syntax;
+{
+ int i;
+ const char *name = (const char *) class_name;
+
+ /* In case of REG_ICASE "upper" and "lower" match the both of
+ upper and lower cases. */
+ if ((syntax & RE_ICASE)
+ && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
+ name = "alpha";
+
+#ifdef RE_ENABLE_I18N
+ /* Check the space of the arrays. */
+ if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ new_char_class_alloc);
+ if (BE (new_char_classes == NULL, 0))
+ return REG_ESPACE;
+ mbcset->char_classes = new_char_classes;
+ *char_class_alloc = new_char_class_alloc;
+ }
+ mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
+#endif /* RE_ENABLE_I18N */
+
+#define BUILD_CHARCLASS_LOOP(ctype_func) \
+ for (i = 0; i < SBC_MAX; ++i) \
+ { \
+ if (ctype_func (i)) \
+ { \
+ int ch = trans ? trans[i] : i; \
+ bitset_set (sbcset, ch); \
+ } \
+ }
+
+ if (strcmp (name, "alnum") == 0)
+ BUILD_CHARCLASS_LOOP (isalnum)
+ else if (strcmp (name, "cntrl") == 0)
+ BUILD_CHARCLASS_LOOP (iscntrl)
+ else if (strcmp (name, "lower") == 0)
+ BUILD_CHARCLASS_LOOP (islower)
+ else if (strcmp (name, "space") == 0)
+ BUILD_CHARCLASS_LOOP (isspace)
+ else if (strcmp (name, "alpha") == 0)
+ BUILD_CHARCLASS_LOOP (isalpha)
+ else if (strcmp (name, "digit") == 0)
+ BUILD_CHARCLASS_LOOP (isdigit)
+ else if (strcmp (name, "print") == 0)
+ BUILD_CHARCLASS_LOOP (isprint)
+ else if (strcmp (name, "upper") == 0)
+ BUILD_CHARCLASS_LOOP (isupper)
+ else if (strcmp (name, "blank") == 0)
+ BUILD_CHARCLASS_LOOP (isblank)
+ else if (strcmp (name, "graph") == 0)
+ BUILD_CHARCLASS_LOOP (isgraph)
+ else if (strcmp (name, "punct") == 0)
+ BUILD_CHARCLASS_LOOP (ispunct)
+ else if (strcmp (name, "xdigit") == 0)
+ BUILD_CHARCLASS_LOOP (isxdigit)
+ else
+ return REG_ECTYPE;
+
+ return REG_NOERROR;
+}
+
+static bin_tree_t *
+build_charclass_op (dfa, trans, class_name, extra, non_match, err)
+ re_dfa_t *dfa;
+ unsigned RE_TRANSLATE_TYPE trans;
+ const unsigned char *class_name;
+ const unsigned char *extra;
+ int non_match;
+ reg_errcode_t *err;
+{
+ re_bitset_ptr_t sbcset;
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset;
+ int alloc = 0;
+#endif /* not RE_ENABLE_I18N */
+ reg_errcode_t ret;
+ re_token_t br_token;
+ bin_tree_t *tree;
+
+ sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+#ifdef RE_ENABLE_I18N
+ mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
+#endif /* RE_ENABLE_I18N */
+
+#ifdef RE_ENABLE_I18N
+ if (BE (sbcset == NULL || mbcset == NULL, 0))
+#else /* not RE_ENABLE_I18N */
+ if (BE (sbcset == NULL, 0))
+#endif /* not RE_ENABLE_I18N */
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ if (non_match)
+ {
+#ifdef RE_ENABLE_I18N
+ /*
+ if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
+ bitset_set(cset->sbcset, '\0');
+ */
+ mbcset->non_match = 1;
+#endif /* not RE_ENABLE_I18N */
+ }
+
+ /* We don't care the syntax in this case. */
+ ret = build_charclass (trans, sbcset,
+#ifdef RE_ENABLE_I18N
+ mbcset, &alloc,
+#endif /* RE_ENABLE_I18N */
+ class_name, 0);
+
+ if (BE (ret != REG_NOERROR, 0))
+ {
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = ret;
+ return NULL;
+ }
+ /* \w match '_' also. */
+ for (; *extra; extra++)
+ bitset_set (sbcset, *extra);
+
+ /* If it is non-matching list. */
+ if (non_match)
+ bitset_not (sbcset);
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure only single byte characters are set. */
+ if (dfa->mb_cur_max > 1)
+ bitset_mask (sbcset, dfa->sb_char);
+#endif
+
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (tree == NULL, 0))
+ goto build_word_op_espace;
+
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ re_token_t alt_token;
+ bin_tree_t *mbc_tree;
+ /* Build a tree for complex bracket. */
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ dfa->has_mb_node = 1;
+ mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto build_word_op_espace;
+ /* Then join them by ALT node. */
+ alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
+ tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token);
+ if (BE (mbc_tree != NULL, 1))
+ return tree;
+ }
+ else
+ {
+ free_charset (mbcset);
+ return tree;
+ }
+#else /* not RE_ENABLE_I18N */
+ return tree;
+#endif /* not RE_ENABLE_I18N */
+
+ build_word_op_espace:
+ re_free (sbcset);
+#ifdef RE_ENABLE_I18N
+ free_charset (mbcset);
+#endif /* RE_ENABLE_I18N */
+ *err = REG_ESPACE;
+ return NULL;
+}
+
+/* This is intended for the expressions like "a{1,3}".
+ Fetch a number from `input', and return the number.
+ Return -1, if the number field is empty like "{,1}".
+ Return -2, If an error is occured. */
+
+static int
+fetch_number (input, token, syntax)
+ re_string_t *input;
+ re_token_t *token;
+ reg_syntax_t syntax;
+{
+ int num = -1;
+ unsigned char c;
+ while (1)
+ {
+ fetch_token (token, input, syntax);
+ c = token->opr.c;
+ if (BE (token->type == END_OF_RE, 0))
+ return -2;
+ if (token->type == OP_CLOSE_DUP_NUM || c == ',')
+ break;
+ num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2)
+ ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0'));
+ num = (num > RE_DUP_MAX) ? -2 : num;
+ }
+ return num;
+}
+
+#ifdef RE_ENABLE_I18N
+static void
+free_charset (re_charset_t *cset)
+{
+ re_free (cset->mbchars);
+# ifdef _LIBC
+ re_free (cset->coll_syms);
+ re_free (cset->equiv_classes);
+ re_free (cset->range_starts);
+ re_free (cset->range_ends);
+# endif
+ re_free (cset->char_classes);
+ re_free (cset);
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Functions for binary tree operation. */
+
+/* Create a tree node. */
+
+static bin_tree_t *
+create_tree (dfa, left, right, type, index)
+ re_dfa_t *dfa;
+ bin_tree_t *left;
+ bin_tree_t *right;
+ re_token_type_t type;
+ int index;
+{
+ bin_tree_t *tree;
+ if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
+ {
+ bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+ if (storage == NULL)
+ return NULL;
+ storage->next = dfa->str_tree_storage;
+ dfa->str_tree_storage = storage;
+ dfa->str_tree_storage_idx = 0;
+ }
+ tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
+ tree->parent = NULL;
+ tree->left = left;
+ tree->right = right;
+ tree->type = type;
+ tree->node_idx = index;
+ tree->first = -1;
+ tree->next = -1;
+ re_node_set_init_empty (&tree->eclosure);
+
+ if (left != NULL)
+ left->parent = tree;
+ if (right != NULL)
+ right->parent = tree;
+ return tree;
+}
+
+/* Create both a DFA node and a tree for it. */
+
+static bin_tree_t *
+re_dfa_add_tree_node (dfa, left, right, token)
+ re_dfa_t *dfa;
+ bin_tree_t *left;
+ bin_tree_t *right;
+ const re_token_t *token;
+{
+ int new_idx = re_dfa_add_node (dfa, *token, 0);
+
+ if (new_idx == -1)
+ return NULL;
+
+ return create_tree (dfa, left, right, 0, new_idx);
+}
+
+/* Mark the tree SRC as an optional subexpression. */
+
+static void
+mark_opt_subexp (src, dfa)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
+{
+ /* Pass an OPT_SUBEXP_IDX which is != 1 if the duplicated tree is
+ a subexpression. */
+ if (src->type == CONCAT
+ && src->left->type == NON_TYPE
+ && dfa->nodes[src->left->node_idx].type == OP_OPEN_SUBEXP)
+ mark_opt_subexp_iter (src, dfa, dfa->nodes[src->left->node_idx].opr.idx);
+}
+
+
+/* Recursive tree walker for mark_opt_subexp. */
+
+static void
+mark_opt_subexp_iter (src, dfa, idx)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
+ int idx;
+{
+ int node_idx;
+
+ if (src->type == NON_TYPE)
+ {
+ node_idx = src->node_idx;
+ if ((dfa->nodes[node_idx].type == OP_OPEN_SUBEXP
+ || dfa->nodes[node_idx].type == OP_CLOSE_SUBEXP)
+ && dfa->nodes[node_idx].opr.idx == idx)
+ dfa->nodes[node_idx].opt_subexp = 1;
+ }
+
+ if (src->left != NULL)
+ mark_opt_subexp_iter (src->left, dfa, idx);
+
+ if (src->right != NULL)
+ mark_opt_subexp_iter (src->right, dfa, idx);
+}
+
+
+/* Duplicate the node SRC, and return new node. */
+
+static bin_tree_t *
+duplicate_tree (src, dfa)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
+{
+ bin_tree_t *left = NULL, *right = NULL, *new_tree;
+ int new_node_idx;
+ /* Since node indies must be according to Post-order of the tree,
+ we must duplicate the left at first. */
+ if (src->left != NULL)
+ {
+ left = duplicate_tree (src->left, dfa);
+ if (left == NULL)
+ return NULL;
+ }
+
+ /* Secondaly, duplicate the right. */
+ if (src->right != NULL)
+ {
+ right = duplicate_tree (src->right, dfa);
+ if (right == NULL)
+ return NULL;
+ }
+
+ /* At last, duplicate itself. */
+ if (src->type == NON_TYPE)
+ {
+ new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
+ dfa->nodes[new_node_idx].duplicated = 1;
+ if (BE (new_node_idx == -1, 0))
+ return NULL;
+ }
+ else
+ new_node_idx = src->type;
+
+ new_tree = create_tree (dfa, left, right, src->type, new_node_idx);
+ return new_tree;
+}
diff --git a/lib/regex.c b/lib/regex.c
new file mode 100644
index 0000000..7a4f304
--- /dev/null
+++ b/lib/regex.c
@@ -0,0 +1,97 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef _AIX
+#pragma alloca
+#else
+# ifndef allocax /* predefined by HP cc +Olibcalls */
+# ifdef __GNUC__
+# define alloca(size) __builtin_alloca (size)
+# else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef __hpux
+ void *alloca ();
+# else
+# if !defined __OS2__ && !defined WIN32
+ char *alloca ();
+# else
+# include <malloc.h> /* OS/2 defines alloca in here */
+# endif
+# endif
+# endif
+# endif
+# endif
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+# include "../locale/localeinfo.h"
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+#include <sys/types.h>
+
+/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
+ GNU regex allows. Include it before <regex.h>, which correctly
+ #undefs RE_DUP_MAX and sets it to the right value. */
+#include <limits.h>
+
+#include <regex.h>
+#include "regex_internal.h"
+
+#include "regex_internal.c"
+#include "regcomp.c"
+#include "regexec.c"
+
+/* Binary backward compatibility. */
+#if _LIBC
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
+link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
+int re_max_failures = 2000;
+# endif
+#endif
diff --git a/lib/regex_.h b/lib/regex_.h
new file mode 100644
index 0000000..e32af35
--- /dev/null
+++ b/lib/regex_.h
@@ -0,0 +1,588 @@
+/* Definitions for data structures and routines for the regular
+ expression library.
+ Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_H
+#define _REGEX_H 1
+
+#include <sys/types.h>
+
+/* Allow the use in C++ code. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+# include <stddef.h>
+#endif
+
+/* The following two types have to be signed and unsigned integer type
+ wide enough to hold a value of a pointer. For most ANSI compilers
+ ptrdiff_t and size_t should be likely OK. Still size of these two
+ types is 2 for Microsoft C. Ugh... */
+typedef long int s_reg_t;
+typedef unsigned long int active_reg_t;
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long int reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, turn on internal regex debugging.
+ If not set, and debugging was on, turn it off.
+ This only works if regex.c is compiled -DDEBUG.
+ We define this bit always, so that all that's needed to turn on
+ debugging is to recompile regex.c; the calling code can always have
+ this bit set, and it won't affect anything in the normal case. */
+#define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, a syntactically invalid interval is treated as
+ a string of ordinary characters. For example, the ERE 'a{1' is
+ treated as 'a\{1'. */
+#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
+
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+ for ^, because it is difficult to scan the regex backwards to find
+ whether ^ should be special. */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
+/* If this bit is set, then \{ cannot be first in an bre or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GNU_AWK \
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
+ & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
+ | RE_CONTEXT_INVALID_OPS ))
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
+ | RE_INTERVALS | RE_NO_GNU_OPS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
+ | RE_INVALID_INTERVAL_ORD)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
+ removed and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
+#endif
+/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */
+#define RE_DUP_MAX (0x7fff)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+/* Use pmatch[0] to set boundaries for regexec. */
+#define REG_STARTEND (1 << 2)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+#ifdef _XOPEN_SOURCE
+ REG_ENOSYS = -1, /* This will never happen for this implementation. */
+#endif
+
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Inalid collating element. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+#ifndef RE_TRANSLATE_TYPE
+# define RE_TRANSLATE_TYPE char *
+#endif
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long int allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long int used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ RE_TRANSLATE_TYPE translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+# define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+# define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+# define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+# ifndef _CRAY
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+# endif
+#endif
+
+/* GCC 2.95 and later have "__restrict"; C99 compilers have
+ "restrict", and "configure" may have defined "restrict". */
+#ifndef __restrict
+# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
+# if defined restrict || 199901L <= __STDC_VERSION__
+# define __restrict restrict
+# else
+# define __restrict
+# endif
+# endif
+#endif
+/* gcc 3.1 and up support the [restrict] syntax. */
+#ifndef __restrict_arr
+# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+# define __restrict_arr __restrict
+# else
+# define __restrict_arr
+# endif
+#endif
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
+ const char *__restrict __pattern,
+ int __cflags));
+
+extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
+ const char *__restrict __string, size_t __nmatch,
+ regmatch_t __pmatch[__restrict_arr],
+ int __eflags));
+
+extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
+ char *__errbuf, size_t __errbuf_size));
+
+extern void regfree _RE_ARGS ((regex_t *__preg));
+
+
+#ifdef __cplusplus
+}
+#endif /* C++ */
+
+#endif /* regex.h */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
new file mode 100644
index 0000000..95c68d7
--- /dev/null
+++ b/lib/regex_internal.c
@@ -0,0 +1,1653 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static void re_string_construct_common (const char *str, int len,
+ re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa) internal_function;
+#ifdef RE_ENABLE_I18N
+static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
+ wint_t *last_wc) internal_function;
+#endif /* RE_ENABLE_I18N */
+static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash) internal_function;
+static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+ unsigned int hash) internal_function;
+static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int hash) internal_function;
+static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context,
+ unsigned int hash) internal_function;
+static unsigned int inline calc_state_hash (const re_node_set *nodes,
+ unsigned int context) internal_function;
+
+/* Functions for string operation. */
+
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
+static reg_errcode_t
+re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
+ re_string_t *pstr;
+ const char *str;
+ int len, init_len, icase;
+ RE_TRANSLATE_TYPE trans;
+ const re_dfa_t *dfa;
+{
+ reg_errcode_t ret;
+ int init_buf_len;
+
+ /* Ensure at least one character fits into the buffers. */
+ if (init_len < dfa->mb_cur_max)
+ init_len = dfa->mb_cur_max;
+ init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->word_char = dfa->word_char;
+ pstr->word_ops_used = dfa->word_ops_used;
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+ pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
+ pstr->valid_raw_len = pstr->valid_len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+re_string_construct (pstr, str, len, trans, icase, dfa)
+ re_string_t *pstr;
+ const char *str;
+ int len, icase;
+ RE_TRANSLATE_TYPE trans;
+ const re_dfa_t *dfa;
+{
+ reg_errcode_t ret;
+ memset (pstr, '\0', sizeof (re_string_t));
+ re_string_construct_common (str, len, pstr, trans, icase, dfa);
+
+ if (len > 0)
+ {
+ ret = re_string_realloc_buffers (pstr, len + 1);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ {
+ while (1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ if (pstr->valid_raw_len >= len)
+ break;
+ if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
+ break;
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ {
+ pstr->valid_len = pstr->bufs_len;
+ pstr->valid_raw_len = pstr->bufs_len;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Helper functions for re_string_allocate, and re_string_construct. */
+
+static reg_errcode_t
+re_string_realloc_buffers (pstr, new_buf_len)
+ re_string_t *pstr;
+ int new_buf_len;
+{
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->wcs = new_array;
+ if (pstr->offsets != NULL)
+ {
+ int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->offsets = new_array;
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ {
+ unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
+ new_buf_len);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ pstr->mbs = new_array;
+ }
+ pstr->bufs_len = new_buf_len;
+ return REG_NOERROR;
+}
+
+
+static void
+re_string_construct_common (str, len, pstr, trans, icase, dfa)
+ const char *str;
+ int len;
+ re_string_t *pstr;
+ RE_TRANSLATE_TYPE trans;
+ int icase;
+ const re_dfa_t *dfa;
+{
+ pstr->raw_mbs = (const unsigned char *) str;
+ pstr->len = len;
+ pstr->raw_len = len;
+ pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans;
+ pstr->icase = icase ? 1 : 0;
+ pstr->mbs_allocated = (trans != NULL || icase);
+ pstr->mb_cur_max = dfa->mb_cur_max;
+ pstr->is_utf8 = dfa->is_utf8;
+ pstr->map_notascii = dfa->map_notascii;
+ pstr->stop = pstr->len;
+ pstr->raw_stop = pstr->stop;
+}
+
+#ifdef RE_ENABLE_I18N
+
+/* Build wide character buffer PSTR->WCS.
+ If the byte sequence of the string are:
+ <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
+ Then wide character buffer will be:
+ <wc1> , WEOF , <wc2> , WEOF , <wc3>
+ We use WEOF for padding, they indicate that the position isn't
+ a first byte of a multibyte character.
+
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
+build_wcs_buffer (pstr)
+ re_string_t *pstr;
+{
+#ifdef _LIBC
+ unsigned char buf[pstr->mb_cur_max];
+#else
+ unsigned char buf[64];
+#endif
+ mbstate_t prev_st;
+ int byte_idx, end_idx, mbclen, remain_len;
+
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ /* Apply the translation if we need. */
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
+ buf[i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
+ mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ if (BE (pstr->trans != NULL, 0))
+ wc = pstr->trans[wc];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Write wide character and padding. */
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+}
+
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static int
+build_wcs_upper_buffer (pstr)
+ re_string_t *pstr;
+{
+ mbstate_t prev_st;
+ int src_idx, byte_idx, end_idx, mbclen, remain_len;
+#ifdef _LIBC
+ unsigned char buf[pstr->mb_cur_max];
+#else
+ unsigned char buf[64];
+#endif
+
+ byte_idx = pstr->valid_len;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+#ifdef _LIBC
+ /* The following optimization assumes that the wchar_t encoding is
+ always ISO 10646. */
+ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
+ {
+ while (byte_idx < end_idx)
+ {
+ wchar_t wc;
+
+ if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
+ && mbsinit (&pstr->cur_state))
+ {
+ /* In case of a singlebyte character. */
+ pstr->mbs[byte_idx]
+ = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
+ /* The next step uses the assumption that wchar_t is encoded
+ with ISO 10646: all ASCII values can be converted like
+ this. */
+ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
+ ++byte_idx;
+ continue;
+ }
+
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc,
+ ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ + byte_idx), remain_len, &pstr->cur_state);
+ if (BE (mbclen > 0, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ int mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else
+ {
+ src_idx = byte_idx;
+ goto offsets_needed;
+ }
+ }
+ else
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->mbs[byte_idx] = ch;
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = byte_idx;
+ return REG_NOERROR;
+ }
+ else
+#endif
+ for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
+ {
+ wchar_t wc;
+ const char *p;
+offsets_needed:
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i, ch;
+
+ for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
+ {
+ ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
+ buf[i] = pstr->trans[ch];
+ }
+ p = (const char *) buf;
+ }
+ else
+ p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+ mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+ if (BE (mbclen > 0, 1))
+ {
+ wchar_t wcu = wc;
+ if (iswlower (wc))
+ {
+ int mbcdlen;
+
+ wcu = towupper (wc);
+ mbcdlen = wcrtomb (buf, wcu, &prev_st);
+ if (BE (mbclen == mbcdlen, 1))
+ memcpy (pstr->mbs + byte_idx, buf, mbclen);
+ else
+ {
+ int i;
+
+ if (byte_idx + mbcdlen > pstr->bufs_len)
+ {
+ pstr->cur_state = prev_st;
+ break;
+ }
+
+ if (pstr->offsets == NULL)
+ {
+ pstr->offsets = re_malloc (int, pstr->bufs_len);
+
+ if (pstr->offsets == NULL)
+ return REG_ESPACE;
+ }
+ if (!pstr->offsets_needed)
+ {
+ for (i = 0; i < byte_idx; ++i)
+ pstr->offsets[i] = i;
+ pstr->offsets_needed = 1;
+ }
+
+ memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
+ pstr->wcs[byte_idx] = wcu;
+ pstr->offsets[byte_idx] = src_idx;
+ for (i = 1; i < mbcdlen; ++i)
+ {
+ pstr->offsets[byte_idx + i]
+ = src_idx + (i < mbclen ? i : mbclen - 1);
+ pstr->wcs[byte_idx + i] = WEOF;
+ }
+ pstr->len += mbcdlen - mbclen;
+ if (pstr->raw_stop > src_idx)
+ pstr->stop += mbcdlen - mbclen;
+ end_idx = (pstr->bufs_len > pstr->len)
+ ? pstr->len : pstr->bufs_len;
+ byte_idx += mbcdlen;
+ src_idx += mbclen;
+ continue;
+ }
+ }
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ {
+ int i;
+ for (i = 0; i < mbclen; ++i)
+ pstr->offsets[byte_idx + i] = src_idx + i;
+ }
+ src_idx += mbclen;
+
+ pstr->wcs[byte_idx++] = wcu;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
+ }
+ else if (mbclen == (size_t) -1 || mbclen == 0)
+ {
+ /* It is an invalid character or '\0'. Just use the byte. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
+
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans [ch];
+ pstr->mbs[byte_idx] = ch;
+
+ if (BE (pstr->offsets_needed != 0, 0))
+ pstr->offsets[byte_idx] = src_idx;
+ ++src_idx;
+
+ /* And also cast it to wide char. */
+ pstr->wcs[byte_idx++] = (wchar_t) ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
+ }
+ else
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ }
+ pstr->valid_len = byte_idx;
+ pstr->valid_raw_len = src_idx;
+ return REG_NOERROR;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static int
+re_string_skip_chars (pstr, new_raw_idx, last_wc)
+ re_string_t *pstr;
+ int new_raw_idx;
+ wint_t *last_wc;
+{
+ mbstate_t prev_st;
+ int rawbuf_idx, mbclen;
+ wchar_t wc = 0;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ int remain_len;
+ remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ *last_wc = (wint_t) wc;
+ return rawbuf_idx;
+}
+#endif /* RE_ENABLE_I18N */
+
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
+build_upper_buffer (pstr)
+ re_string_t *pstr;
+{
+ int char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (BE (pstr->trans != NULL, 0))
+ ch = pstr->trans[ch];
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
+ }
+ pstr->valid_len = char_idx;
+ pstr->valid_raw_len = char_idx;
+}
+
+/* Apply TRANS to the buffer in PSTR. */
+
+static void
+re_string_translate_buffer (pstr)
+ re_string_t *pstr;
+{
+ int buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
+ {
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs[buf_idx] = pstr->trans[ch];
+ }
+
+ pstr->valid_len = buf_idx;
+ pstr->valid_raw_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+re_string_reconstruct (pstr, idx, eflags)
+ re_string_t *pstr;
+ int idx, eflags;
+{
+ int offset = idx - pstr->raw_mbs_idx;
+ if (BE (offset < 0, 0))
+ {
+ /* Reset buffer. */
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+#endif /* RE_ENABLE_I18N */
+ pstr->len = pstr->raw_len;
+ pstr->stop = pstr->raw_stop;
+ pstr->valid_len = 0;
+ pstr->raw_mbs_idx = 0;
+ pstr->valid_raw_len = 0;
+ pstr->offsets_needed = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!pstr->mbs_allocated)
+ pstr->mbs = (unsigned char *) pstr->raw_mbs;
+ offset = idx;
+ }
+
+ if (BE (offset != 0, 1))
+ {
+ /* Are the characters which are already checked remain? */
+ if (BE (offset < pstr->valid_raw_len, 1)
+#ifdef RE_ENABLE_I18N
+ /* Handling this would enlarge the code too much.
+ Accept a slowdown in that case. */
+ && pstr->offsets_needed == 0
+#endif
+ )
+ {
+ /* Yes, move them to the front of the buffer. */
+ pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->mb_cur_max, 1) > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wint_t));
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+ pstr->valid_raw_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
+#endif
+ }
+ else
+ {
+ /* No, skip all characters until IDX. */
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->offsets_needed, 0))
+ {
+ pstr->len = pstr->raw_len - idx + offset;
+ pstr->stop = pstr->raw_stop - idx + offset;
+ pstr->offsets_needed = 0;
+ }
+#endif
+ pstr->valid_len = 0;
+ pstr->valid_raw_len = 0;
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ int wcs_idx;
+ wint_t wc = WEOF;
+
+#ifdef _LIBC
+ if (pstr->is_utf8)
+ {
+ const unsigned char *raw, *p, *q, *end;
+
+ /* Special case UTF-8. Multi-byte chars start with any
+ byte other than 0x80 - 0xbf. */
+ raw = pstr->raw_mbs + pstr->raw_mbs_idx;
+ end = raw + (offset - pstr->mb_cur_max);
+ for (p = raw + offset - 1; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ int mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+
+ q = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ q = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mlen = mbrtowc (&wc2, p, mlen, &cur_state)
+ - (raw + offset - p);
+ if (mlen >= 0)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mlen;
+ wc = wc2;
+ }
+ break;
+ }
+ }
+#endif
+ if (wc == WEOF)
+ pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+ if (BE (pstr->valid_len, 0))
+ {
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ if (pstr->mbs_allocated)
+ memset (pstr->mbs, 255, pstr->valid_len);
+ }
+ pstr->valid_raw_len = pstr->valid_len;
+ pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
+ && IS_WIDE_WORD_CHAR (wc))
+ ? CONTEXT_WORD
+ : ((IS_WIDE_NEWLINE (wc)
+ && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
+ if (pstr->trans)
+ c = pstr->trans[c];
+ pstr->tip_context = (bitset_contain (pstr->word_char, c)
+ ? CONTEXT_WORD
+ : ((IS_NEWLINE (c) && pstr->newline_anchor)
+ ? CONTEXT_NEWLINE : 0));
+ }
+ }
+ if (!BE (pstr->mbs_allocated, 0))
+ pstr->mbs += offset;
+ }
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+ pstr->stop -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (BE (pstr->mb_cur_max, 1) > 1)
+ {
+ if (pstr->icase)
+ {
+ int ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+ build_wcs_buffer (pstr);
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
+
+ pstr->cur_idx = 0;
+ return REG_NOERROR;
+}
+
+static unsigned char
+re_string_peek_byte_case (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+ int ch, off;
+
+ /* Handle the common (easiest) cases first. */
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_peek_byte (pstr, idx);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1
+ && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ off = pstr->cur_idx + idx;
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ off = pstr->offsets[off];
+#endif
+
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+#ifdef RE_ENABLE_I18N
+ /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
+ this function returns CAPITAL LETTER I instead of first byte of
+ DOTLESS SMALL LETTER I. The latter would confuse the parser,
+ since peek_byte_case doesn't advance cur_idx in any way. */
+ if (pstr->offsets_needed && !isascii (ch))
+ return re_string_peek_byte (pstr, idx);
+#endif
+
+ return ch;
+}
+
+static unsigned char
+re_string_fetch_byte_case (pstr)
+ re_string_t *pstr;
+{
+ if (BE (!pstr->mbs_allocated, 1))
+ return re_string_fetch_byte (pstr);
+
+#ifdef RE_ENABLE_I18N
+ if (pstr->offsets_needed)
+ {
+ int off, ch;
+
+ /* For tr_TR.UTF-8 [[:islower:]] there is
+ [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
+ in that case the whole multi-byte character and return
+ the original letter. On the other side, with
+ [[: DOTLESS SMALL LETTER I return [[:I, as doing
+ anything else would complicate things too much. */
+
+ if (!re_string_first_byte (pstr, pstr->cur_idx))
+ return re_string_fetch_byte (pstr);
+
+ off = pstr->offsets[pstr->cur_idx];
+ ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
+
+ if (! isascii (ch))
+ return re_string_fetch_byte (pstr);
+
+ re_string_skip_bytes (pstr,
+ re_string_char_size_at (pstr, pstr->cur_idx));
+ return ch;
+ }
+#endif
+
+ return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
+}
+
+static void
+re_string_destruct (pstr)
+ re_string_t *pstr;
+{
+#ifdef RE_ENABLE_I18N
+ re_free (pstr->wcs);
+ re_free (pstr->offsets);
+#endif /* RE_ENABLE_I18N */
+ if (pstr->mbs_allocated)
+ re_free (pstr->mbs);
+}
+
+/* Return the context at IDX in INPUT. */
+
+static unsigned int
+re_string_context_at (input, idx, eflags)
+ const re_string_t *input;
+ int idx, eflags;
+{
+ int c;
+ if (BE (idx < 0, 0))
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
+
+ else if (BE (idx == input->len, 0))
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
+
+#ifdef RE_ENABLE_I18N
+ else if (BE (input->mb_cur_max, 1) > 1)
+ {
+ wint_t wc;
+ int wc_idx = idx;
+ while(input->wcs[wc_idx] == WEOF)
+ {
+#ifdef DEBUG
+ /* It must not happen. */
+ assert (wc_idx >= 0);
+#endif
+ --wc_idx;
+ if (wc_idx < 0)
+ return input->tip_context;
+ }
+ wc = input->wcs[wc_idx];
+ if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
+ return CONTEXT_WORD;
+ return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
+ ? CONTEXT_NEWLINE : 0);
+ }
+ else
+#endif
+ {
+ c = re_string_byte_at (input, idx);
+ if (bitset_contain (input->word_char, c))
+ return CONTEXT_WORD;
+ return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
+ }
+}
+
+/* Functions for set operation. */
+
+static reg_errcode_t
+re_node_set_alloc (set, size)
+ re_node_set *set;
+ int size;
+{
+ set->alloc = size;
+ set->nelem = 0;
+ set->elems = re_malloc (int, size);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_1 (set, elem)
+ re_node_set *set;
+ int elem;
+{
+ set->alloc = 1;
+ set->nelem = 1;
+ set->elems = re_malloc (int, 1);
+ if (BE (set->elems == NULL, 0))
+ {
+ set->alloc = set->nelem = 0;
+ return REG_ESPACE;
+ }
+ set->elems[0] = elem;
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_2 (set, elem1, elem2)
+ re_node_set *set;
+ int elem1, elem2;
+{
+ set->alloc = 2;
+ set->elems = re_malloc (int, 2);
+ if (BE (set->elems == NULL, 0))
+ return REG_ESPACE;
+ if (elem1 == elem2)
+ {
+ set->nelem = 1;
+ set->elems[0] = elem1;
+ }
+ else
+ {
+ set->nelem = 2;
+ if (elem1 < elem2)
+ {
+ set->elems[0] = elem1;
+ set->elems[1] = elem2;
+ }
+ else
+ {
+ set->elems[0] = elem2;
+ set->elems[1] = elem1;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+re_node_set_init_copy (dest, src)
+ re_node_set *dest;
+ const re_node_set *src;
+{
+ dest->nelem = src->nelem;
+ if (src->nelem > 0)
+ {
+ dest->alloc = dest->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ {
+ dest->alloc = dest->nelem = 0;
+ return REG_ESPACE;
+ }
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ }
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+}
+
+/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded.
+ Note: We assume dest->elems is NULL, when dest->alloc is 0. */
+
+static reg_errcode_t
+re_node_set_add_intersect (dest, src1, src2)
+ re_node_set *dest;
+ const re_node_set *src1, *src2;
+{
+ int i1, i2, is, id, delta, sbase;
+ if (src1->nelem == 0 || src2->nelem == 0)
+ return REG_NOERROR;
+
+ /* We need dest->nelem + 2 * elems_in_intersection; this is a
+ conservative estimate. */
+ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
+ {
+ int new_alloc = src1->nelem + src2->nelem + dest->alloc;
+ int *new_elems = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_elems == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_elems;
+ dest->alloc = new_alloc;
+ }
+
+ /* Find the items in the intersection of SRC1 and SRC2, and copy
+ into the top of DEST those that are not already in DEST itself. */
+ sbase = dest->nelem + src1->nelem + src2->nelem;
+ i1 = src1->nelem - 1;
+ i2 = src2->nelem - 1;
+ id = dest->nelem - 1;
+ for (;;)
+ {
+ if (src1->elems[i1] == src2->elems[i2])
+ {
+ /* Try to find the item in DEST. Maybe we could binary search? */
+ while (id >= 0 && dest->elems[id] > src1->elems[i1])
+ --id;
+
+ if (id < 0 || dest->elems[id] != src1->elems[i1])
+ dest->elems[--sbase] = src1->elems[i1];
+
+ if (--i1 < 0 || --i2 < 0)
+ break;
+ }
+
+ /* Lower the highest of the two items. */
+ else if (src1->elems[i1] < src2->elems[i2])
+ {
+ if (--i2 < 0)
+ break;
+ }
+ else
+ {
+ if (--i1 < 0)
+ break;
+ }
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + src1->nelem + src2->nelem - 1;
+ delta = is - sbase + 1;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place; this is more or
+ less the same loop that is in re_node_set_merge. */
+ dest->nelem += delta;
+ if (delta > 0 && id >= 0)
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ break;
+ }
+ }
+
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
+
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets SRC1 and SRC2. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+re_node_set_init_union (dest, src1, src2)
+ re_node_set *dest;
+ const re_node_set *src1, *src2;
+{
+ int i1, i2, id;
+ if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
+ {
+ dest->alloc = src1->nelem + src2->nelem;
+ dest->elems = re_malloc (int, dest->alloc);
+ if (BE (dest->elems == NULL, 0))
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (src1 != NULL && src1->nelem > 0)
+ return re_node_set_init_copy (dest, src1);
+ else if (src2 != NULL && src2->nelem > 0)
+ return re_node_set_init_copy (dest, src2);
+ else
+ re_node_set_init_empty (dest);
+ return REG_NOERROR;
+ }
+ for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
+ {
+ if (src1->elems[i1] > src2->elems[i2])
+ {
+ dest->elems[id++] = src2->elems[i2++];
+ continue;
+ }
+ if (src1->elems[i1] == src2->elems[i2])
+ ++i2;
+ dest->elems[id++] = src1->elems[i1++];
+ }
+ if (i1 < src1->nelem)
+ {
+ memcpy (dest->elems + id, src1->elems + i1,
+ (src1->nelem - i1) * sizeof (int));
+ id += src1->nelem - i1;
+ }
+ else if (i2 < src2->nelem)
+ {
+ memcpy (dest->elems + id, src2->elems + i2,
+ (src2->nelem - i2) * sizeof (int));
+ id += src2->nelem - i2;
+ }
+ dest->nelem = id;
+ return REG_NOERROR;
+}
+
+/* Calculate the union set of the sets DEST and SRC. And store it to
+ DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
+
+static reg_errcode_t
+re_node_set_merge (dest, src)
+ re_node_set *dest;
+ const re_node_set *src;
+{
+ int is, id, sbase, delta;
+ if (src == NULL || src->nelem == 0)
+ return REG_NOERROR;
+ if (dest->alloc < 2 * src->nelem + dest->nelem)
+ {
+ int new_alloc = 2 * (src->nelem + dest->alloc);
+ int *new_buffer = re_realloc (dest->elems, int, new_alloc);
+ if (BE (new_buffer == NULL, 0))
+ return REG_ESPACE;
+ dest->elems = new_buffer;
+ dest->alloc = new_alloc;
+ }
+
+ if (BE (dest->nelem == 0, 0))
+ {
+ dest->nelem = src->nelem;
+ memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
+ return REG_NOERROR;
+ }
+
+ /* Copy into the top of DEST the items of SRC that are not
+ found in DEST. Maybe we could binary search in DEST? */
+ for (sbase = dest->nelem + 2 * src->nelem,
+ is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
+ {
+ if (dest->elems[id] == src->elems[is])
+ is--, id--;
+ else if (dest->elems[id] < src->elems[is])
+ dest->elems[--sbase] = src->elems[is--];
+ else /* if (dest->elems[id] > src->elems[is]) */
+ --id;
+ }
+
+ if (is >= 0)
+ {
+ /* If DEST is exhausted, the remaining items of SRC must be unique. */
+ sbase -= is + 1;
+ memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
+ }
+
+ id = dest->nelem - 1;
+ is = dest->nelem + 2 * src->nelem - 1;
+ delta = is - sbase + 1;
+ if (delta == 0)
+ return REG_NOERROR;
+
+ /* Now copy. When DELTA becomes zero, the remaining
+ DEST elements are already in place. */
+ dest->nelem += delta;
+ for (;;)
+ {
+ if (dest->elems[is] > dest->elems[id])
+ {
+ /* Copy from the top. */
+ dest->elems[id + delta--] = dest->elems[is--];
+ if (delta == 0)
+ break;
+ }
+ else
+ {
+ /* Slide from the bottom. */
+ dest->elems[id + delta] = dest->elems[id];
+ if (--id < 0)
+ {
+ /* Copy remaining SRC elements. */
+ memcpy (dest->elems, dest->elems + sbase,
+ delta * sizeof (int));
+ break;
+ }
+ }
+ }
+
+ return REG_NOERROR;
+}
+
+/* Insert the new element ELEM to the re_node_set* SET.
+ SET should not already have ELEM.
+ return -1 if an error is occured, return 1 otherwise. */
+
+static int
+re_node_set_insert (set, elem)
+ re_node_set *set;
+ int elem;
+{
+ int idx;
+ /* In case the set is empty. */
+ if (set->alloc == 0)
+ {
+ if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
+ return 1;
+ else
+ return -1;
+ }
+
+ if (BE (set->nelem, 0) == 0)
+ {
+ /* We already guaranteed above that set->alloc != 0. */
+ set->elems[0] = elem;
+ ++set->nelem;
+ return 1;
+ }
+
+ /* Realloc if we need. */
+ if (set->alloc == set->nelem)
+ {
+ int *new_array;
+ set->alloc = set->alloc * 2;
+ new_array = re_realloc (set->elems, int, set->alloc);
+ if (BE (new_array == NULL, 0))
+ return -1;
+ set->elems = new_array;
+ }
+
+ /* Move the elements which follows the new element. Test the
+ first element separately to skip a check in the inner loop. */
+ if (elem < set->elems[0])
+ {
+ idx = 0;
+ for (idx = set->nelem; idx > 0; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+ else
+ {
+ for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
+ set->elems[idx] = set->elems[idx - 1];
+ }
+
+ /* Insert the new element. */
+ set->elems[idx] = elem;
+ ++set->nelem;
+ return 1;
+}
+
+/* Compare two node sets SET1 and SET2.
+ return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */
+
+static int
+re_node_set_compare (set1, set2)
+ const re_node_set *set1, *set2;
+{
+ int i;
+ if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
+ return 0;
+ for (i = set1->nelem ; --i >= 0 ; )
+ if (set1->elems[i] != set2->elems[i])
+ return 0;
+ return 1;
+}
+
+/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
+
+static int
+re_node_set_contains (set, elem)
+ const re_node_set *set;
+ int elem;
+{
+ int idx, right, mid;
+ if (set->nelem <= 0)
+ return 0;
+
+ /* Binary search the element. */
+ idx = 0;
+ right = set->nelem - 1;
+ while (idx < right)
+ {
+ mid = (idx + right) / 2;
+ if (set->elems[mid] < elem)
+ idx = mid + 1;
+ else
+ right = mid;
+ }
+ return set->elems[idx] == elem ? idx + 1 : 0;
+}
+
+static void
+re_node_set_remove_at (set, idx)
+ re_node_set *set;
+ int idx;
+{
+ if (idx < 0 || idx >= set->nelem)
+ return;
+ --set->nelem;
+ for (; idx < set->nelem; idx++)
+ set->elems[idx] = set->elems[idx + 1];
+}
+
+
+/* Add the token TOKEN to dfa->nodes, and return the index of the token.
+ Or return -1, if an error will be occured. */
+
+static int
+re_dfa_add_node (dfa, token, mode)
+ re_dfa_t *dfa;
+ re_token_t token;
+ int mode;
+{
+ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
+ {
+ int new_nodes_alloc = dfa->nodes_alloc * 2;
+ re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
+ new_nodes_alloc);
+ if (BE (new_array == NULL, 0))
+ return -1;
+ dfa->nodes = new_array;
+ if (mode)
+ {
+ int *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set,
+ new_nodes_alloc);
+ new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+ new_nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL
+ || new_inveclosures == NULL, 0))
+ return -1;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->inveclosures = new_inveclosures;
+ }
+ dfa->nodes_alloc = new_nodes_alloc;
+ }
+ dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].opt_subexp = 0;
+ dfa->nodes[dfa->nodes_len].duplicated = 0;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
+ return dfa->nodes_len++;
+}
+
+static unsigned int inline
+calc_state_hash (nodes, context)
+ const re_node_set *nodes;
+ unsigned int context;
+{
+ unsigned int hash = nodes->nelem + context;
+ int i;
+ for (i = 0 ; i < nodes->nelem ; i++)
+ hash += nodes->elems[i];
+ return hash;
+}
+
+/* Search for the state whose node_set is equivalent to NODES.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state (err, dfa, nodes)
+ reg_errcode_t *err;
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (BE (nodes->nelem == 0, 0))
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, 0);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (hash != state->hash)
+ continue;
+ if (re_node_set_compare (&state->nodes, nodes))
+ return state;
+ }
+
+ /* There are no appropriate state in the dfa, create the new one. */
+ new_state = create_ci_newstate (dfa, nodes, hash);
+ if (BE (new_state != NULL, 1))
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+}
+
+/* Search for the state whose node_set is equivalent to NODES and
+ whose context is equivalent to CONTEXT.
+ Return the pointer to the state, if we found it in the DFA.
+ Otherwise create the new one and return it. In case of an error
+ return NULL and set the error code in ERR.
+ Note: - We assume NULL as the invalid state, then it is possible that
+ return value is NULL and ERR is REG_NOERROR.
+ - We never return non-NULL value in case of any errors, it is for
+ optimization. */
+
+static re_dfastate_t*
+re_acquire_state_context (err, dfa, nodes, context)
+ reg_errcode_t *err;
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int context;
+{
+ unsigned int hash;
+ re_dfastate_t *new_state;
+ struct re_state_table_entry *spot;
+ int i;
+ if (nodes->nelem == 0)
+ {
+ *err = REG_NOERROR;
+ return NULL;
+ }
+ hash = calc_state_hash (nodes, context);
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ for (i = 0 ; i < spot->num ; i++)
+ {
+ re_dfastate_t *state = spot->array[i];
+ if (state->hash == hash
+ && state->context == context
+ && re_node_set_compare (state->entrance_nodes, nodes))
+ return state;
+ }
+ /* There are no appropriate state in `dfa', create the new one. */
+ new_state = create_cd_newstate (dfa, nodes, context, hash);
+ if (BE (new_state != NULL, 1))
+ return new_state;
+ else
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+}
+
+/* Allocate memory for DFA state and initialize common properties.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_newstate_common (dfa, nodes, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int hash;
+{
+ re_dfastate_t *newstate;
+ reg_errcode_t err;
+ newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ err = re_node_set_init_copy (&newstate->nodes, nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_free (newstate);
+ return NULL;
+ }
+ newstate->trtable = NULL;
+ newstate->hash = hash;
+ return newstate;
+}
+
+/* Store the new state NEWSTATE whose hash value is HASH in appropriate
+ position. Return value indicate the error code if failed. */
+
+static reg_errcode_t
+register_state (dfa, newstate, hash)
+ re_dfa_t *dfa;
+ re_dfastate_t *newstate;
+ unsigned int hash;
+{
+ struct re_state_table_entry *spot;
+ spot = dfa->state_table + (hash & dfa->state_hash_mask);
+
+ if (BE (spot->alloc <= spot->num, 0))
+ {
+ int new_alloc = 2 * spot->num + 2;
+ re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
+ new_alloc);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ spot->array = new_array;
+ spot->alloc = new_alloc;
+ }
+ spot->array[spot->num++] = newstate;
+ return REG_NOERROR;
+}
+
+/* Create the new state which is independ of contexts.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_ci_newstate (dfa, nodes, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int hash;
+{
+ int i;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+ newstate = create_newstate_common (dfa, nodes, hash);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (type == CHARACTER && !node->constraint)
+ continue;
+
+ /* If the state has the halt node, the state is a halt state. */
+ else if (type == END_OF_RE)
+ newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || type == OP_UTF8_PERIOD
+ || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+/* Create the new state which is depend on the context CONTEXT.
+ Return the new state if succeeded, otherwise return NULL. */
+
+static re_dfastate_t *
+create_cd_newstate (dfa, nodes, context, hash)
+ re_dfa_t *dfa;
+ const re_node_set *nodes;
+ unsigned int context, hash;
+{
+ int i, nctx_nodes = 0;
+ reg_errcode_t err;
+ re_dfastate_t *newstate;
+
+ newstate = create_newstate_common (dfa, nodes, hash);
+ if (BE (newstate == NULL, 0))
+ return NULL;
+ newstate->context = context;
+ newstate->entrance_nodes = &newstate->nodes;
+
+ for (i = 0 ; i < nodes->nelem ; i++)
+ {
+ unsigned int constraint = 0;
+ re_token_t *node = dfa->nodes + nodes->elems[i];
+ re_token_type_t type = node->type;
+ if (node->constraint)
+ constraint = node->constraint;
+
+ if (type == CHARACTER && !constraint)
+ continue;
+ /* If the state has the halt node, the state is a halt state. */
+ else if (type == END_OF_RE)
+ newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || type == OP_UTF8_PERIOD
+ || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
+ else if (type == OP_BACK_REF)
+ newstate->has_backref = 1;
+ else if (type == ANCHOR)
+ constraint = node->opr.ctx_type;
+
+ if (constraint)
+ {
+ if (newstate->entrance_nodes == &newstate->nodes)
+ {
+ newstate->entrance_nodes = re_malloc (re_node_set, 1);
+ if (BE (newstate->entrance_nodes == NULL, 0))
+ {
+ free_state (newstate);
+ return NULL;
+ }
+ re_node_set_init_copy (newstate->entrance_nodes, nodes);
+ nctx_nodes = 0;
+ newstate->has_constraint = 1;
+ }
+
+ if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
+ {
+ re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
+ ++nctx_nodes;
+ }
+ }
+ }
+ err = register_state (dfa, newstate, hash);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ free_state (newstate);
+ newstate = NULL;
+ }
+ return newstate;
+}
+
+static void
+free_state (state)
+ re_dfastate_t *state;
+{
+ if (state->entrance_nodes != &state->nodes)
+ {
+ re_node_set_free (state->entrance_nodes);
+ re_free (state->entrance_nodes);
+ }
+ re_node_set_free (&state->nodes);
+ re_free (state->trtable);
+ re_free (state->word_trtable);
+ re_free (state);
+}
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
new file mode 100644
index 0000000..84d02cc
--- /dev/null
+++ b/lib/regex_internal.h
@@ -0,0 +1,807 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#ifndef _REGEX_INTERNAL_H
+#define _REGEX_INTERNAL_H 1
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined HAVE_LOCALE_H || defined _LIBC
+# include <locale.h>
+#endif
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
+
+/* In case that the system doesn't have isblank(). */
+#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
+# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#endif
+
+#ifdef _LIBC
+# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+# define _RE_DEFINE_LOCALE_FUNCTIONS 1
+# include <locale/localeinfo.h>
+# include <locale/elem-hash.h>
+# include <locale/coll-lookup.h>
+# endif
+#endif
+
+/* This is for other GNU distributions with internationalized messages. */
+#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
+# include <libintl.h>
+# ifdef _LIBC
+# undef gettext
+# define gettext(msgid) \
+ INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
+# endif
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+# define gettext_noop(String) String
+#endif
+
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
+# define RE_ENABLE_I18N
+#endif
+
+#if __GNUC__ >= 3
+# define BE(expr, val) __builtin_expect (expr, val)
+#else
+# define BE(expr, val) (expr)
+# define inline
+#endif
+
+/* Number of bits in a byte. */
+#define BYTE_BITS 8
+/* Number of single byte character. */
+#define SBC_MAX 256
+
+#define COLL_ELEM_LEN_MAX 8
+
+/* The character which represents newline. */
+#define NEWLINE_CHAR '\n'
+#define WIDE_NEWLINE_CHAR L'\n'
+
+/* Rename to standard API for using out of glibc. */
+#ifndef _LIBC
+# define __wctype wctype
+# define __iswctype iswctype
+# define __btowc btowc
+/* This one is defined by string2.h */
+# ifndef __mempcpy
+# define __mempcpy mempcpy
+# endif
+# define __wcrtomb wcrtomb
+# define attribute_hidden
+#endif /* not _LIBC */
+
+#ifdef __GNUC__
+# define __attribute(arg) __attribute__ (arg)
+#else
+# define __attribute(arg)
+#endif
+
+extern const char __re_error_msgid[] attribute_hidden;
+extern const size_t __re_error_msgid_idx[] attribute_hidden;
+
+/* Number of bits in an unsinged int. */
+#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
+/* Number of unsigned int in an bit_set. */
+#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
+typedef unsigned int bitset[BITSET_UINTS];
+typedef unsigned int *re_bitset_ptr_t;
+typedef const unsigned int *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
+#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
+#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
+#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_set_all(set) \
+ memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
+#define bitset_copy(dest,src) \
+ memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
+static inline void bitset_not (bitset set);
+static inline void bitset_merge (bitset dest, const bitset src);
+static inline void bitset_not_merge (bitset dest, const bitset src);
+static inline void bitset_mask (bitset dest, const bitset src);
+
+#define PREV_WORD_CONSTRAINT 0x0001
+#define PREV_NOTWORD_CONSTRAINT 0x0002
+#define NEXT_WORD_CONSTRAINT 0x0004
+#define NEXT_NOTWORD_CONSTRAINT 0x0008
+#define PREV_NEWLINE_CONSTRAINT 0x0010
+#define NEXT_NEWLINE_CONSTRAINT 0x0020
+#define PREV_BEGBUF_CONSTRAINT 0x0040
+#define NEXT_ENDBUF_CONSTRAINT 0x0080
+#define DUMMY_CONSTRAINT 0x0100
+
+typedef enum
+{
+ INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
+ WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
+ LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
+ LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
+ BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
+ BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
+ WORD_DELIM = DUMMY_CONSTRAINT
+} re_context_type;
+
+typedef struct
+{
+ int alloc;
+ int nelem;
+ int *elems;
+} re_node_set;
+
+typedef enum
+{
+ NON_TYPE = 0,
+
+ /* Node type, These are used by token, node, tree. */
+ CHARACTER = 1,
+ END_OF_RE = 2,
+ SIMPLE_BRACKET = 3,
+ OP_BACK_REF = 4,
+ OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET = 6,
+ OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+ /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
+ when the debugger shows values of this enum type. */
+#define EPSILON_BIT 8
+ OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+ OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+ OP_ALT = EPSILON_BIT | 2,
+ OP_DUP_ASTERISK = EPSILON_BIT | 3,
+ OP_DUP_PLUS = EPSILON_BIT | 4,
+ OP_DUP_QUESTION = EPSILON_BIT | 5,
+ ANCHOR = EPSILON_BIT | 6,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT = 16,
+
+ /* Token type, these are used only by token. */
+ OP_OPEN_BRACKET = 17,
+ OP_CLOSE_BRACKET,
+ OP_CHARSET_RANGE,
+ OP_OPEN_DUP_NUM,
+ OP_CLOSE_DUP_NUM,
+ OP_NON_MATCH_LIST,
+ OP_OPEN_COLL_ELEM,
+ OP_CLOSE_COLL_ELEM,
+ OP_OPEN_EQUIV_CLASS,
+ OP_CLOSE_EQUIV_CLASS,
+ OP_OPEN_CHAR_CLASS,
+ OP_CLOSE_CHAR_CLASS,
+ OP_WORD,
+ OP_NOTWORD,
+ OP_SPACE,
+ OP_NOTSPACE,
+ BACK_SLASH
+
+} re_token_type_t;
+
+#ifdef RE_ENABLE_I18N
+typedef struct
+{
+ /* Multibyte characters. */
+ wchar_t *mbchars;
+
+ /* Collating symbols. */
+# ifdef _LIBC
+ int32_t *coll_syms;
+# endif
+
+ /* Equivalence classes. */
+# ifdef _LIBC
+ int32_t *equiv_classes;
+# endif
+
+ /* Range expressions. */
+# ifdef _LIBC
+ uint32_t *range_starts;
+ uint32_t *range_ends;
+# else /* not _LIBC */
+ wchar_t *range_starts;
+ wchar_t *range_ends;
+# endif /* not _LIBC */
+
+ /* Character classes. */
+ wctype_t *char_classes;
+
+ /* If this character set is the non-matching list. */
+ unsigned int non_match : 1;
+
+ /* # of multibyte characters. */
+ int nmbchars;
+
+ /* # of collating symbols. */
+ int ncoll_syms;
+
+ /* # of equivalence classes. */
+ int nequiv_classes;
+
+ /* # of range expressions. */
+ int nranges;
+
+ /* # of character classes. */
+ int nchar_classes;
+} re_charset_t;
+#endif /* RE_ENABLE_I18N */
+
+typedef struct
+{
+ union
+ {
+ unsigned char c; /* for CHARACTER */
+ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
+#ifdef RE_ENABLE_I18N
+ re_charset_t *mbcset; /* for COMPLEX_BRACKET */
+#endif /* RE_ENABLE_I18N */
+ int idx; /* for BACK_REF */
+ re_context_type ctx_type; /* for ANCHOR */
+ } opr;
+#if __GNUC__ >= 2
+ re_token_type_t type : 8;
+#else
+ re_token_type_t type;
+#endif
+ unsigned int constraint : 10; /* context constraint */
+ unsigned int duplicated : 1;
+ unsigned int opt_subexp : 1;
+#ifdef RE_ENABLE_I18N
+ /* These 2 bits can be moved into the union if needed (e.g. if running out
+ of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
+ unsigned int mb_partial : 1;
+#endif
+ unsigned int word_char : 1;
+} re_token_t;
+
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+#define ACCEPT_MB_NODE(type) \
+ ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
+
+struct re_string_t
+{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Store the multibyte string. In case of "case insensitive mode" like
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
+#ifdef RE_ENABLE_I18N
+ /* Store the wide character string which is corresponding to MBS. */
+ wint_t *wcs;
+ int *offsets;
+ mbstate_t cur_state;
+#endif
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ int raw_mbs_idx;
+ /* The length of the valid characters in the buffers. */
+ int valid_len;
+ /* The corresponding number of bytes in raw_mbs array. */
+ int valid_raw_len;
+ /* The length of the buffers MBS and WCS. */
+ int bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ int cur_idx;
+ /* length of RAW_MBS array. */
+ int raw_len;
+ /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
+ int len;
+ /* End of the buffer may be shorter than its length in the cases such
+ as re_match_2, re_search_2. Then, we use STOP for end of the buffer
+ instead of LEN. */
+ int raw_stop;
+ /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
+ int stop;
+
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ unsigned RE_TRANSLATE_TYPE trans;
+ /* Copy of re_dfa_t's word_char. */
+ re_const_bitset_ptr_t word_char;
+ /* 1 if REG_ICASE. */
+ unsigned char icase;
+ unsigned char is_utf8;
+ unsigned char map_notascii;
+ unsigned char mbs_allocated;
+ unsigned char offsets_needed;
+ unsigned char newline_anchor;
+ unsigned char word_ops_used;
+ int mb_cur_max;
+};
+typedef struct re_string_t re_string_t;
+
+
+struct re_dfa_t;
+typedef struct re_dfa_t re_dfa_t;
+
+#ifndef _LIBC
+# ifdef __i386__
+# define internal_function __attribute ((regparm (3), stdcall))
+# else
+# define internal_function
+# endif
+#endif
+
+#ifndef RE_NO_INTERNAL_PROTOTYPES
+static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
+ int len, int init_len,
+ RE_TRANSLATE_TYPE trans, int icase,
+ const re_dfa_t *dfa)
+ internal_function;
+static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
+ int len, RE_TRANSLATE_TYPE trans,
+ int icase, const re_dfa_t *dfa)
+ internal_function;
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+ int eflags) internal_function;
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ int new_buf_len)
+ internal_function;
+# ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr) internal_function;
+static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
+# endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr) internal_function;
+static void re_string_translate_buffer (re_string_t *pstr) internal_function;
+static void re_string_destruct (re_string_t *pstr) internal_function;
+# ifdef RE_ENABLE_I18N
+static int re_string_elem_size_at (const re_string_t *pstr, int idx)
+ internal_function;
+static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
+ internal_function;
+static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
+ internal_function;
+# endif /* RE_ENABLE_I18N */
+static unsigned int re_string_context_at (const re_string_t *input, int idx,
+ int eflags) internal_function;
+static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
+ int idx) internal_function;
+static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
+ internal_function;
+#endif
+#define re_string_peek_byte(pstr, offset) \
+ ((pstr)->mbs[(pstr)->cur_idx + offset])
+#define re_string_fetch_byte(pstr) \
+ ((pstr)->mbs[(pstr)->cur_idx++])
+#define re_string_first_byte(pstr, idx) \
+ ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
+#define re_string_is_single_byte_char(pstr, idx) \
+ ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
+ || (pstr)->wcs[(idx) + 1] != WEOF))
+#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
+#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
+#define re_string_get_buffer(pstr) ((pstr)->mbs)
+#define re_string_length(pstr) ((pstr)->len)
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
+#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
+#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
+
+#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
+#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_free(p) free (p)
+
+struct bin_tree_t
+{
+ struct bin_tree_t *parent;
+ struct bin_tree_t *left;
+ struct bin_tree_t *right;
+
+ /* `node_idx' is the index in dfa->nodes, if `type' == 0.
+ Otherwise `type' indicate the type of this node. */
+ re_token_type_t type;
+ int node_idx;
+
+ int first;
+ int next;
+ re_node_set eclosure;
+};
+typedef struct bin_tree_t bin_tree_t;
+
+#define BIN_TREE_STORAGE_SIZE \
+ ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+ struct bin_tree_storage_t *next;
+ bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
+
+#define CONTEXT_WORD 1
+#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
+#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
+#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
+
+#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
+#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
+#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
+#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
+#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
+
+#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
+#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
+#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
+#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
+
+#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
+ ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
+ || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
+
+#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
+ ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
+ || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
+ || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
+
+struct re_dfastate_t
+{
+ unsigned int hash;
+ re_node_set nodes;
+ re_node_set *entrance_nodes;
+ struct re_dfastate_t **trtable;
+ struct re_dfastate_t **word_trtable;
+ unsigned int context : 4;
+ unsigned int halt : 1;
+ /* If this state can accept `multi byte'.
+ Note that we refer to multibyte characters, and multi character
+ collating elements as `multi byte'. */
+ unsigned int accept_mb : 1;
+ /* If this state has backreference node(s). */
+ unsigned int has_backref : 1;
+ unsigned int has_constraint : 1;
+};
+typedef struct re_dfastate_t re_dfastate_t;
+
+typedef struct
+{
+ /* start <= node < end */
+ int start;
+ int end;
+} re_subexp_t;
+
+struct re_state_table_entry
+{
+ int num;
+ int alloc;
+ re_dfastate_t **array;
+};
+
+/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
+
+typedef struct
+{
+ int next_idx;
+ int alloc;
+ re_dfastate_t **array;
+} state_array_t;
+
+/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
+
+typedef struct
+{
+ int node;
+ int str_idx; /* The position NODE match at. */
+ state_array_t path;
+} re_sub_match_last_t;
+
+/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
+ And information about the node, whose type is OP_CLOSE_SUBEXP,
+ corresponding to NODE is stored in LASTS. */
+
+typedef struct
+{
+ int str_idx;
+ int node;
+ int next_last_offset;
+ state_array_t *path;
+ int alasts; /* Allocation size of LASTS. */
+ int nlasts; /* The number of LASTS. */
+ re_sub_match_last_t **lasts;
+} re_sub_match_top_t;
+
+struct re_backref_cache_entry
+{
+ int node;
+ int str_idx;
+ int subexp_from;
+ int subexp_to;
+ int flag;
+};
+
+typedef struct
+{
+ /* The string object corresponding to the input string. */
+ re_string_t input;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_dfa_t *const dfa;
+#else
+ re_dfa_t *dfa;
+#endif
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ int match_last;
+ int last_node;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ int state_log_top;
+ /* Back reference cache. */
+ int nbkref_ents;
+ int abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_mb_elem_len;
+ int nsub_tops;
+ int asub_tops;
+ re_sub_match_top_t **sub_tops;
+} re_match_context_t;
+
+typedef struct
+{
+ int cur_bkref;
+ int cls_subexp_idx;
+
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **limited_states;
+
+ re_node_set limits;
+
+ int last_node;
+ int last_str_idx;
+ int check_subexp;
+} re_sift_context_t;
+
+struct re_fail_stack_ent_t
+{
+ int idx;
+ int node;
+ regmatch_t *regs;
+ re_node_set eps_via_nodes;
+};
+
+struct re_fail_stack_t
+{
+ int num;
+ int alloc;
+ struct re_fail_stack_ent_t *stack;
+};
+
+struct re_dfa_t
+{
+ re_subexp_t *subexps;
+ re_token_t *nodes;
+ int nodes_alloc;
+ int nodes_len;
+ int *nexts;
+ int *org_indices;
+ re_node_set *edests;
+ re_node_set *eclosures;
+ re_node_set *inveclosures;
+ struct re_state_table_entry *state_table;
+ re_dfastate_t *init_state;
+ re_dfastate_t *init_state_word;
+ re_dfastate_t *init_state_nl;
+ re_dfastate_t *init_state_begbuf;
+ bin_tree_t *str_tree;
+ bin_tree_storage_t *str_tree_storage;
+ re_bitset_ptr_t sb_char;
+ int str_tree_storage_idx;
+
+ /* number of subexpressions `re_nsub' is in regex_t. */
+ int subexps_alloc;
+ unsigned int state_hash_mask;
+ int states_alloc;
+ int init_node;
+ int nbackref; /* The number of backreference in this dfa. */
+ /* Bitmap expressing which backreference is used. */
+ unsigned int used_bkref_map;
+ unsigned int has_plural_match : 1;
+ /* If this dfa has "multibyte node", which is a backreference or
+ a node which can accept multibyte character or multi character
+ collating element. */
+ unsigned int has_mb_node : 1;
+ unsigned int is_utf8 : 1;
+ unsigned int map_notascii : 1;
+ unsigned int word_ops_used : 1;
+ int mb_cur_max;
+ bitset word_char;
+ reg_syntax_t syntax;
+#ifdef DEBUG
+ char* re_str;
+#endif
+};
+
+#ifndef RE_NO_INTERNAL_PROTOTYPES
+static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function;
+static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function;
+static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
+ int elem2) internal_function;
+#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
+static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
+ const re_node_set *src) internal_function;
+static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
+ const re_node_set *src1,
+ const re_node_set *src2) internal_function;
+static reg_errcode_t re_node_set_init_union (re_node_set *dest,
+ const re_node_set *src1,
+ const re_node_set *src2) internal_function;
+static reg_errcode_t re_node_set_merge (re_node_set *dest,
+ const re_node_set *src) internal_function;
+static int re_node_set_insert (re_node_set *set, int elem) internal_function;
+static int re_node_set_compare (const re_node_set *set1,
+ const re_node_set *set2) internal_function;
+static int re_node_set_contains (const re_node_set *set, int elem) internal_function;
+static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
+#define re_node_set_empty(p) ((p)->nelem = 0)
+#define re_node_set_free(set) re_free ((set)->elems)
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
+static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
+ const re_node_set *nodes) internal_function;
+static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
+ re_dfa_t *dfa,
+ const re_node_set *nodes,
+ unsigned int context) internal_function;
+static void free_state (re_dfastate_t *state) internal_function;
+#endif
+
+
+typedef enum
+{
+ SB_CHAR,
+ MB_CHAR,
+ EQUIV_CLASS,
+ COLL_SYM,
+ CHAR_CLASS
+} bracket_elem_type;
+
+typedef struct
+{
+ bracket_elem_type type;
+ union
+ {
+ unsigned char ch;
+ unsigned char *name;
+ wchar_t wch;
+ } opr;
+} bracket_elem_t;
+
+
+/* Inline functions for bitset operation. */
+static inline void
+bitset_not (set)
+ bitset set;
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ set[bitset_i] = ~set[bitset_i];
+}
+
+static inline void
+bitset_merge (dest, src)
+ bitset dest;
+ const bitset src;
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ dest[bitset_i] |= src[bitset_i];
+}
+
+static inline void
+bitset_not_merge (dest, src)
+ bitset dest;
+ const bitset src;
+{
+ int i;
+ for (i = 0; i < BITSET_UINTS; ++i)
+ dest[i] |= ~src[i];
+}
+
+static inline void
+bitset_mask (dest, src)
+ bitset dest;
+ const bitset src;
+{
+ int bitset_i;
+ for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+ dest[bitset_i] &= src[bitset_i];
+}
+
+#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
+/* Inline functions for re_string. */
+static inline int
+re_string_char_size_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+ int byte_idx;
+ if (pstr->mb_cur_max == 1)
+ return 1;
+ for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
+ if (pstr->wcs[idx + byte_idx] != WEOF)
+ break;
+ return byte_idx;
+}
+
+static inline wint_t
+re_string_wchar_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+ if (pstr->mb_cur_max == 1)
+ return (wint_t) pstr->mbs[idx];
+ return (wint_t) pstr->wcs[idx];
+}
+
+static int
+re_string_elem_size_at (pstr, idx)
+ const re_string_t *pstr;
+ int idx;
+{
+#ifdef _LIBC
+ const unsigned char *p, *extra;
+ const int32_t *table, *indirect;
+ int32_t tmp;
+# include <locale/weight.h>
+ uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+
+ if (nrules != 0)
+ {
+ table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
+ _NL_COLLATE_INDIRECTMB);
+ p = pstr->mbs + idx;
+ tmp = findidx (&p);
+ return p - pstr->mbs - idx;
+ }
+ else
+#endif /* _LIBC */
+ return 1;
+}
+#endif /* RE_ENABLE_I18N */
+
+#endif /* _REGEX_INTERNAL_H */
diff --git a/lib/regexec.c b/lib/regexec.c
new file mode 100644
index 0000000..66e0df7
--- /dev/null
+++ b/lib/regexec.c
@@ -0,0 +1,4269 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int str_idx, int from, int to)
+ internal_function;
+static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
+ internal_function;
+static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+ int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ int node, int str_idx)
+ internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx, int check_subexp)
+ internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, int stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, int length1,
+ const char *string2, int length2,
+ int start, int range, struct re_registers *regs,
+ int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, int length, int start,
+ int range, int stop, struct re_registers *regs,
+ int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ int nregs, int regs_allocated) internal_function;
+static inline re_dfastate_t *acquire_init_state_context
+ (reg_errcode_t *err, const re_match_context_t *mctx, int idx)
+ internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+ internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first)
+ internal_function;
+static int check_halt_node_context (const re_dfa_t *dfa, int node,
+ unsigned int context) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+ internal_function;
+static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node,
+ int cur_idx, int nmatch) internal_function;
+static int proceed_next_node (const re_match_context_t *mctx,
+ int nregs, regmatch_t *regs,
+ int *pidx, int node, re_node_set *eps_via_nodes,
+ struct re_fail_stack_t *fs) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ int str_idx, int *dests, int nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes) internal_function;
+static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
+ regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx) internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
+ re_sift_context_t *sctx) internal_function;
+static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes) internal_function;
+static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates) internal_function;
+static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
+ re_node_set *dest_nodes,
+ const re_node_set *and_nodes) internal_function;
+static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
+ int dst_node, int dst_idx, int src_node,
+ int src_idx) internal_function;
+static int check_dst_limits_calc_pos (re_match_context_t *mctx,
+ int limit, re_node_set *eclosures,
+ int subexp_idx, int node, int str_idx) internal_function;
+static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, re_node_set *dest_nodes) internal_function;
+static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
+ int next_state_log_idx) internal_function;
+static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
+ re_dfastate_t **src, int num) internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+ re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *state)
+ internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *next_state) internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *pstate) internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+ re_dfastate_t *pstate) internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+ const re_node_set *nodes) internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+ int bkref_node, int bkref_str_idx) internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+ const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ int bkref_node, int bkref_str) internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+ state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str,
+ int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+ int str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes) internal_function;
+static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ int ex_subexp, int type) internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ int target, int ex_subexp,
+ int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+ re_node_set *cur_nodes, int cur_str,
+ int last_str, int subexp_num,
+ int type) internal_function;
+static int build_trtable (re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int idx) internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len) internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (re_dfa_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+ const re_token_t *node, int idx) internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
+
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ int start, length;
+ if (eflags & REG_STARTEND)
+ {
+ start = pmatch[0].rm_so;
+ length = pmatch[0].rm_eo;
+ }
+ else
+ {
+ start = 0;
+ length = strlen (string);
+ }
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, 0, NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, nmatch, pmatch, eflags);
+ return err != REG_NOERROR;
+}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+int
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, 1);
+}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+ stop, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ const char *str;
+ int rval;
+ int len = length1 + length2;
+ int free_str = 0;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ char *s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+ str = s;
+ free_str = 1;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ if (free_str)
+ re_free ((char *) str);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is nonzero the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ int nregs, rval;
+ int eflags = 0;
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (start + range > length, 0))
+ range = length - start;
+ else if (BE (start + range < 0, 0))
+ range = -start;
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED &&
+ regs->num_regs < bufp->re_nsub + 1, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ return -2;
+
+ result = re_search_internal (bufp, string, length, start, range, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs, regs_allocated;
+{
+ int rval = REGS_REALLOCATE;
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (BE (need_regs > regs->num_regs, 0))
+ {
+ regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+ regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->start = new_start;
+ regs->end = new_end;
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC
+int
+# ifdef _LIBC
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+static re_node_set empty_set;
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ mingings with regexec. START, and RANGE have the same meanings
+ with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+ eflags)
+ const regex_t *preg;
+ const char *string;
+ int length, start, range, stop, eflags;
+ size_t nmatch;
+ regmatch_t pmatch[];
+{
+ reg_errcode_t err;
+ re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+ int left_lim, right_lim, incr;
+ int fl_longest_match, match_first, match_kind, match_last = -1;
+ int fast_translate, sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_match_context_t mctx = { .dfa = dfa };
+#else
+ re_match_context_t mctx;
+#endif
+ char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL;
+ unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+ mctx.dfa = dfa;
+#endif
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (start + range >= 0 && start + range <= length);
+#endif
+
+ /* If initial states with non-begbuf contexts have no elements,
+ the regex must be anchored. If preg->newline_anchor is set,
+ we'll never use init_state_nl, so do not check it. */
+ if (dfa->init_state->nodes.nelem == 0
+ && dfa->init_state_word->nodes.nelem == 0
+ && (dfa->init_state_nl->nodes.nelem == 0
+ || !preg->newline_anchor))
+ {
+ if (start != 0 && start + range != 0)
+ return REG_NOMATCH;
+ start = range = 0;
+ }
+
+ re_node_set_init_empty (&empty_set);
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ mctx.input.stop = stop;
+ mctx.input.raw_stop = stop;
+ mctx.input.newline_anchor = preg->newline_anchor;
+
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+ match_first = start;
+ mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+ sb = dfa->mb_cur_max == 1;
+ match_kind =
+ (fastmap ? 8 : 0)
+ | (sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (range >= 0 ? 2 : 0)
+ | (t != NULL ? 1 : 0);
+
+ for (;; match_first += incr)
+ {
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized to save code.
+ We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 0: case 1: case 2: case 3:
+ case 4: case 5: case 6: case 7:
+ /* No fastmap. */
+ break;
+
+ case 15:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 14:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
+ }
+ break;
+
+ case 12:
+ case 13:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ do
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ if (mctx.input.raw_mbs_idx + mctx.input.valid_raw_len <= match_first
+ || match_first < mctx.input.raw_mbs_idx)
+ {
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = ((match_first >= length) ? 0
+ : re_string_byte_at (&mctx.input,
+ match_first - mctx.input.raw_mbs_idx));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ }
+ while (match_first >= left_lim && match_first <= right_lim);
+ if (!fastmap[ch])
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
+ break;
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the beginning of the buffer. */
+ err = re_string_reconstruct (&mctx.input, match_first, eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
+#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ range >= 0 ? &match_first : NULL);
+ if (match_last != -1)
+ {
+ if (BE (match_last == -2, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = -1;
+ }
+ else
+ break; /* We found a match. */
+ }
+ }
+
+ match_ctx_clean (&mctx);
+ }
+
+#ifdef DEBUG
+ assert (match_last != -1);
+ assert (err == REG_NOERROR);
+#endif
+
+ /* Set pmatch[] if we need. */
+ if (nmatch > 0)
+ {
+ int reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that we could assume that the matching starts
+ from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+#ifdef RE_ENABLE_I18N
+ if (BE (mctx.input.offsets_needed != 0, 0))
+ {
+ if (pmatch[reg_idx].rm_so == mctx.input.valid_len)
+ pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len;
+ else
+ pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so];
+ if (pmatch[reg_idx].rm_eo == mctx.input.valid_len)
+ pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len;
+ else
+ pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo];
+ }
+#else
+ assert (mctx.input.offsets_needed == 0);
+#endif
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ }
+
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&mctx.input);
+ return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+ re_match_context_t *mctx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ match_ctx_clear_flag (mctx);
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last, 0);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (match_last < 0)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (mctx,
+ mctx->state_log[match_last],
+ match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last, 0);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+acquire_init_state_context (err, mctx, idx)
+ reg_errcode_t *err;
+ const re_match_context_t *mctx;
+ int idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return dfa->init_state;
+}
+
+/* Check whether the regular expression match input string INPUT or not,
+ and return the index where the matching end, return -1 if not match,
+ or return -2 in case of an error.
+ FL_LONGEST_MATCH means we want the POSIX longest matching.
+ If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+ next place where we may want to try matching.
+ Note that the matcher assume that the maching starts from the current
+ index of the buffer. */
+
+static int
+check_matching (mctx, fl_longest_match, p_match_first)
+ re_match_context_t *mctx;
+ int fl_longest_match;
+ int *p_match_first;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int match = 0;
+ int match_last = -1;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+ re_dfastate_t *cur_state;
+ int at_init_state = p_match_first != NULL;
+ int next_start_idx = cur_str_idx;
+
+ err = REG_NOERROR;
+ cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
+ /* An initial state must not be NULL (invalid). */
+ if (BE (cur_state == NULL, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+
+ if (mctx->state_log != NULL)
+ {
+ mctx->state_log[cur_str_idx] = cur_state;
+
+ /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
+ later. E.g. Processing back references. */
+ if (BE (dfa->nbackref, 0))
+ {
+ at_init_state = 0;
+ err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (mctx, &cur_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+
+ /* If the RE accepts NULL string. */
+ if (BE (cur_state->halt, 0))
+ {
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state, cur_str_idx))
+ {
+ if (!fl_longest_match)
+ return cur_str_idx;
+ else
+ {
+ match_last = cur_str_idx;
+ match = 1;
+ }
+ }
+ }
+
+ while (!re_string_eoi (&mctx->input))
+ {
+ re_dfastate_t *old_state = cur_state;
+ int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
+
+ if (BE (next_char_idx >= mctx->input.bufs_len, 0)
+ || (BE (next_char_idx >= mctx->input.valid_len, 0)
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ assert (err == REG_ESPACE);
+ return -2;
+ }
+ }
+
+ cur_state = transit_state (&err, mctx, cur_state);
+ if (mctx->state_log != NULL)
+ cur_state = merge_state_with_log (&err, mctx, cur_state);
+
+ if (cur_state == NULL)
+ {
+ /* Reached the invalid state or an error. Try to recover a valid
+ state using the state log, if available and if we have not
+ already found a valid (even if not the longest) match. */
+ if (BE (err != REG_NOERROR, 0))
+ return -2;
+
+ if (mctx->state_log == NULL
+ || (match && !fl_longest_match)
+ || (cur_state = find_recover_state (&err, mctx)) == NULL)
+ break;
+ }
+
+ if (BE (at_init_state, 0))
+ {
+ if (old_state == cur_state)
+ next_start_idx = next_char_idx;
+ else
+ at_init_state = 0;
+ }
+
+ if (cur_state->halt)
+ {
+ /* Reached a halt state.
+ Check the halt state can satisfy the current context. */
+ if (!cur_state->has_constraint
+ || check_halt_state_context (mctx, cur_state,
+ re_string_cur_idx (&mctx->input)))
+ {
+ /* We found an appropriate halt state. */
+ match_last = re_string_cur_idx (&mctx->input);
+ match = 1;
+
+ /* We found a match, do not modify match_first below. */
+ p_match_first = NULL;
+ if (!fl_longest_match)
+ break;
+ }
+ }
+ }
+
+ if (p_match_first)
+ *p_match_first += next_start_idx;
+
+ return match_last;
+}
+
+/* Check NODE match the current context. */
+
+static int check_halt_node_context (dfa, node, context)
+ const re_dfa_t *dfa;
+ int node;
+ unsigned int context;
+{
+ re_token_type_t type = dfa->nodes[node].type;
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
+ return 0;
+ if (!constraint)
+ return 1;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
+ return 0;
+ return 1;
+}
+
+/* Check the halt state STATE match the current context.
+ Return 0 if not match, if the node, STATE has, is a halt node and
+ match the context, return the node. */
+
+static int
+check_halt_state_context (mctx, state, idx)
+ const re_match_context_t *mctx;
+ const re_dfastate_t *state;
+ int idx;
+{
+ int i;
+ unsigned int context;
+#ifdef DEBUG
+ assert (state->halt);
+#endif
+ context = re_string_context_at (&mctx->input, idx, mctx->eflags);
+ for (i = 0; i < state->nodes.nelem; ++i)
+ if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
+ return state->nodes.elems[i];
+ return 0;
+}
+
+/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
+ corresponding to the DFA).
+ Return the destination node, and update EPS_VIA_NODES, return -1 in case
+ of errors. */
+
+static int
+proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
+ const re_match_context_t *mctx;
+ regmatch_t *regs;
+ int nregs, *pidx, node;
+ re_node_set *eps_via_nodes;
+ struct re_fail_stack_t *fs;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int i, err, dest_node;
+ dest_node = -1;
+ if (IS_EPSILON_NODE (dfa->nodes[node].type))
+ {
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ int ndest, dest_nodes[2];
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ /* Pick up valid destinations. */
+ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
+ {
+ int candidate = dfa->edests[node].elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
+ continue;
+ dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
+ dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
+ ++ndest;
+ }
+ if (ndest <= 1)
+ return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
+ /* In order to avoid infinite loop like "(a*)*". */
+ if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
+ return dest_nodes[1];
+ if (fs != NULL
+ && push_fail_stack (fs, *pidx, dest_nodes, nregs, regs,
+ eps_via_nodes))
+ return -2;
+ return dest_nodes[0];
+ }
+ else
+ {
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[node].type;
+
+#ifdef RE_ENABLE_I18N
+ if (ACCEPT_MB_NODE (type))
+ naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (type == OP_BACK_REF)
+ {
+ int subexp_idx = dfa->nodes[node].opr.idx;
+ naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
+ if (fs != NULL)
+ {
+ if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
+ return -1;
+ else if (naccepted)
+ {
+ char *buf = (char *) re_string_get_buffer (&mctx->input);
+ if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
+ naccepted) != 0)
+ return -1;
+ }
+ }
+
+ if (naccepted == 0)
+ {
+ err = re_node_set_insert (eps_via_nodes, node);
+ if (BE (err < 0, 0))
+ return -2;
+ dest_node = dfa->edests[node].elems[0];
+ if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node))
+ return dest_node;
+ }
+ }
+
+ if (naccepted != 0
+ || check_node_accept (mctx, dfa->nodes + node, *pidx))
+ {
+ dest_node = dfa->nexts[node];
+ *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
+ if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
+ || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
+ dest_node)))
+ return -1;
+ re_node_set_empty (eps_via_nodes);
+ return dest_node;
+ }
+ }
+ return -1;
+}
+
+static reg_errcode_t
+push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
+ struct re_fail_stack_t *fs;
+ int str_idx, *dests, nregs;
+ regmatch_t *regs;
+ re_node_set *eps_via_nodes;
+{
+ reg_errcode_t err;
+ int num = fs->num++;
+ if (fs->num == fs->alloc)
+ {
+ struct re_fail_stack_ent_t *new_array;
+ new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
+ * fs->alloc * 2));
+ if (new_array == NULL)
+ return REG_ESPACE;
+ fs->alloc *= 2;
+ fs->stack = new_array;
+ }
+ fs->stack[num].idx = str_idx;
+ fs->stack[num].node = dests[1];
+ fs->stack[num].regs = re_malloc (regmatch_t, nregs);
+ if (fs->stack[num].regs == NULL)
+ return REG_ESPACE;
+ memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
+ err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
+ return err;
+}
+
+static int
+pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
+ struct re_fail_stack_t *fs;
+ int *pidx, nregs;
+ regmatch_t *regs;
+ re_node_set *eps_via_nodes;
+{
+ int num = --fs->num;
+ assert (num >= 0);
+ *pidx = fs->stack[num].idx;
+ memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
+ re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
+ *eps_via_nodes = fs->stack[num].eps_via_nodes;
+ return fs->stack[num].node;
+}
+
+/* Set the positions where the subexpressions are starts/ends to registers
+ PMATCH.
+ Note: We assume that pmatch[0] is already set, and
+ pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
+
+static reg_errcode_t
+set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
+ const regex_t *preg;
+ const re_match_context_t *mctx;
+ size_t nmatch;
+ regmatch_t *pmatch;
+ int fl_backtrack;
+{
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int idx, cur_node, real_nmatch;
+ re_node_set eps_via_nodes;
+ struct re_fail_stack_t *fs;
+ struct re_fail_stack_t fs_body = { 0, 2, NULL };
+ regmatch_t *prev_idx_match;
+
+#ifdef DEBUG
+ assert (nmatch > 1);
+ assert (mctx->state_log != NULL);
+#endif
+ if (fl_backtrack)
+ {
+ fs = &fs_body;
+ fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+ if (fs->stack == NULL)
+ return REG_ESPACE;
+ }
+ else
+ fs = NULL;
+
+ cur_node = dfa->init_node;
+ real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
+ re_node_set_init_empty (&eps_via_nodes);
+
+ prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
+
+ for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
+ {
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
+
+ if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
+ {
+ int reg_idx;
+ if (fs)
+ {
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
+ break;
+ if (reg_idx == nmatch)
+ {
+ re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+ }
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ }
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOERROR;
+ }
+ }
+
+ /* Proceed to next node. */
+ cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
+ &eps_via_nodes, fs);
+
+ if (BE (cur_node < 0, 0))
+ {
+ if (BE (cur_node == -2, 0))
+ {
+ re_node_set_free (&eps_via_nodes);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
+ }
+ if (fs)
+ cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
+ &eps_via_nodes);
+ else
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOMATCH;
+ }
+ }
+ }
+ re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+free_fail_stack_return (fs)
+ struct re_fail_stack_t *fs;
+{
+ if (fs)
+ {
+ int fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
+ return REG_NOERROR;
+}
+
+static void
+update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
+ re_dfa_t *dfa;
+ regmatch_t *pmatch, *prev_idx_match;
+ int cur_node, cur_idx, nmatch;
+{
+ int type = dfa->nodes[cur_node].type;
+ if (type == OP_OPEN_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
+ /* We are at the first node of this sub expression. */
+ if (reg_num < nmatch)
+ {
+ pmatch[reg_num].rm_so = cur_idx;
+ pmatch[reg_num].rm_eo = -1;
+ }
+ }
+ else if (type == OP_CLOSE_SUBEXP)
+ {
+ int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+ if (reg_num < nmatch)
+ {
+ /* We are at the last node of this sub expression. */
+ if (pmatch[reg_num].rm_so < cur_idx)
+ {
+ pmatch[reg_num].rm_eo = cur_idx;
+ /* This is a non-empty match or we are not inside an optional
+ subexpression. Accept this right away. */
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ }
+ else
+ {
+ if (dfa->nodes[cur_node].opt_subexp
+ && prev_idx_match[reg_num].rm_so != -1)
+ /* We transited through an empty match for an optional
+ subexpression, like (a?)*, and this is not the subexp's
+ first match. Copy back the old content of the registers
+ so that matches of an inner subexpression are undone as
+ well, like in ((a?))*. */
+ memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+ else
+ /* We completed a subexpression, but it may be part of
+ an optional one, so do not update PREV_IDX_MATCH. */
+ pmatch[reg_num].rm_eo = cur_idx;
+ }
+ }
+ }
+}
+
+/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
+ and sift the nodes in each states according to the following rules.
+ Updated state_log will be wrote to STATE_LOG.
+
+ Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
+ 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
+ If `a' isn't the LAST_NODE and `a' can't epsilon transit to
+ the LAST_NODE, we throw away the node `a'.
+ 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
+ string `s' and transit to `b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
+ away the node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
+ thrown away, we throw away the node `a'.
+ 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
+ i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
+ node `a'.
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
+ we throw away the node `a'. */
+
+#define STATE_NODE_CONTAINS(state,node) \
+ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
+
+static reg_errcode_t
+sift_states_backward (mctx, sctx)
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int null_cnt = 0;
+ int str_idx = sctx->last_str_idx;
+ re_node_set cur_dest;
+ re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
+
+#ifdef DEBUG
+ assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
+#endif
+ cur_src = &mctx->state_log[str_idx]->nodes;
+
+ /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
+ transit to the last_node and the last_node itself. */
+ err = re_node_set_init_1 (&cur_dest, sctx->last_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* Then check each states in the state_log. */
+ while (str_idx > 0)
+ {
+ int i, ret;
+ /* Update counters. */
+ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
+ if (null_cnt > mctx->max_mb_elem_len)
+ {
+ memset (sctx->sifted_states, '\0',
+ sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
+ return REG_NOERROR;
+ }
+ re_node_set_empty (&cur_dest);
+ --str_idx;
+ cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'. */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ int prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[prev_node].type;
+
+ if (IS_EPSILON_NODE (type))
+ continue;
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (ACCEPT_MB_NODE (type))
+ naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+
+#endif /* RE_ENABLE_I18N */
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+
+ if (!naccepted
+ && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ int to_idx = str_idx + naccepted;
+ if (check_dst_limits (mctx, &sctx->limits,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ret = re_node_set_insert (&cur_dest, prev_node);
+ if (BE (ret == -1, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+
+ /* Add all the nodes which satisfy the following conditions:
+ - It can epsilon transit to a node in CUR_DEST.
+ - It is in CUR_SRC.
+ And update state_log. */
+ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ err = REG_NOERROR;
+ free_return:
+ re_node_set_free (&cur_dest);
+ return err;
+}
+
+/* Helper functions. */
+
+static reg_errcode_t
+clean_state_log_if_needed (mctx, next_state_log_idx)
+ re_match_context_t *mctx;
+ int next_state_log_idx;
+{
+ int top = mctx->state_log_top;
+
+ if (next_state_log_idx >= mctx->input.bufs_len
+ || (next_state_log_idx >= mctx->input.valid_len
+ && mctx->input.valid_len < mctx->input.len))
+ {
+ reg_errcode_t err;
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (top < next_state_log_idx)
+ {
+ memset (mctx->state_log + top + 1, '\0',
+ sizeof (re_dfastate_t *) * (next_state_log_idx - top));
+ mctx->state_log_top = next_state_log_idx;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+merge_state_array (dfa, dst, src, num)
+ re_dfa_t *dfa;
+ re_dfastate_t **dst;
+ re_dfastate_t **src;
+ int num;
+{
+ int st_idx;
+ reg_errcode_t err;
+ for (st_idx = 0; st_idx < num; ++st_idx)
+ {
+ if (dst[st_idx] == NULL)
+ dst[st_idx] = src[st_idx];
+ else if (src[st_idx] != NULL)
+ {
+ re_node_set merged_set;
+ err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
+ &src[st_idx]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
+ re_node_set_free (&merged_set);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int str_idx;
+ re_node_set *dest_nodes;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ if (dest_nodes->nelem)
+ {
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Then, check the limitations in the current sift_context. */
+ if (dest_nodes->nelem && sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Update state_log. */
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
+ if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+
+ if ((mctx->state_log[str_idx] != NULL
+ && mctx->state_log[str_idx]->has_backref))
+ {
+ err = sift_states_bkref (mctx, sctx, str_idx, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+add_epsilon_src_nodes (dfa, dest_nodes, candidates)
+ re_dfa_t *dfa;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+{
+ reg_errcode_t err;
+ int src_idx;
+ re_node_set src_copy;
+
+ err = re_node_set_init_copy (&src_copy, dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
+ {
+ err = re_node_set_add_intersect (dest_nodes, candidates,
+ dfa->inveclosures
+ + src_copy.elems[src_idx]);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&src_copy);
+ return err;
+ }
+ }
+ re_node_set_free (&src_copy);
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
+ re_dfa_t *dfa;
+ int node;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+{
+ int ecl_idx;
+ reg_errcode_t err;
+ re_node_set *inv_eclosure = dfa->inveclosures + node;
+ re_node_set except_nodes;
+ re_node_set_init_empty (&except_nodes);
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (cur_node == node)
+ continue;
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
+ {
+ int edst1 = dfa->edests[cur_node].elems[0];
+ int edst2 = ((dfa->edests[cur_node].nelem > 1)
+ ? dfa->edests[cur_node].elems[1] : -1);
+ if ((!re_node_set_contains (inv_eclosure, edst1)
+ && re_node_set_contains (dest_nodes, edst1))
+ || (edst2 > 0
+ && !re_node_set_contains (inv_eclosure, edst2)
+ && re_node_set_contains (dest_nodes, edst2)))
+ {
+ err = re_node_set_add_intersect (&except_nodes, candidates,
+ dfa->inveclosures + cur_node);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&except_nodes);
+ return err;
+ }
+ }
+ }
+ }
+ for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
+ {
+ int cur_node = inv_eclosure->elems[ecl_idx];
+ if (!re_node_set_contains (&except_nodes, cur_node))
+ {
+ int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
+ re_node_set_remove_at (dest_nodes, idx);
+ }
+ }
+ re_node_set_free (&except_nodes);
+ return REG_NOERROR;
+}
+
+static int
+check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
+ re_match_context_t *mctx;
+ re_node_set *limits;
+ int dst_node, dst_idx, src_node, src_idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int lim_idx, src_pos, dst_pos;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = mctx->bkref_ents + limits->elems[lim_idx];
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+
+ dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ dfa->eclosures + dst_node,
+ subexp_idx, dst_node, dst_idx);
+ src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
+ dfa->eclosures + src_node,
+ subexp_idx, src_node, src_idx);
+
+ /* In case of:
+ <src> <dst> ( <subexp> )
+ ( <subexp> ) <src> <dst>
+ ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
+ if (src_pos == dst_pos)
+ continue; /* This is unrelated limitation. */
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static int
+check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
+ str_idx)
+ re_match_context_t *mctx;
+ re_node_set *eclosures;
+ int limit, subexp_idx, from_node, str_idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int node_idx;
+
+ /* If we are outside the range of the subexpression, return -1 or 1. */
+ if (str_idx < lim->subexp_from)
+ return -1;
+
+ if (lim->subexp_to < str_idx)
+ return 1;
+
+ /* If we are within the subexpression, return 0. */
+ if (str_idx != lim->subexp_from && str_idx != lim->subexp_to)
+ return 0;
+
+ /* Else, we are on the boundary: examine the nodes on the epsilon
+ closure. */
+ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
+ {
+ int node = eclosures->elems[node_idx];
+ switch (dfa->nodes[node].type)
+ {
+ case OP_BACK_REF:
+ {
+ int bi = search_cur_bkref_entry (mctx, str_idx);
+ for (; bi < mctx->nbkref_ents; ++bi)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
+ int dst, cpos;
+
+ /* If this backreference goes beyond the point we're
+ examining, don't go any further. */
+ if (ent->str_idx > str_idx)
+ break;
+
+ if (ent->node != node || ent->subexp_from != ent->subexp_to)
+ continue;
+
+ /* Recurse trying to reach the OP_OPEN_SUBEXP and
+ OP_CLOSE_SUBEXP cases below. But, if the
+ destination node is the same node as the source
+ node, don't recurse because it would cause an
+ infinite loop: a regex that exhibits this behavior
+ is ()\1*\1* */
+ dst = dfa->edests[node].elems[0];
+ if (dst == from_node)
+ {
+ if (str_idx == lim->subexp_from)
+ return -1;
+ else /* if (str_idx == lim->subexp_to) */
+ return 0;
+ }
+
+ cpos = check_dst_limits_calc_pos (mctx, limit,
+ dfa->eclosures + dst,
+ subexp_idx, dst,
+ str_idx);
+
+ if (cpos == -1 && str_idx == lim->subexp_from)
+ return -1;
+
+ if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
+ return 0;
+ }
+ break;
+ }
+
+ case OP_OPEN_SUBEXP:
+ if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
+ return -1;
+ break;
+
+ case OP_CLOSE_SUBEXP:
+ if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (str_idx == lim->subexp_to)
+ return 1;
+ else
+ return 0;
+}
+
+/* Check the limitations of sub expressions LIMITS, and remove the nodes
+ which are against limitations from DEST_NODES. */
+
+static reg_errcode_t
+check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
+ re_dfa_t *dfa;
+ re_node_set *dest_nodes;
+ const re_node_set *candidates;
+ re_node_set *limits;
+ struct re_backref_cache_entry *bkref_ents;
+ int str_idx;
+{
+ reg_errcode_t err;
+ int node_idx, lim_idx;
+
+ for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
+ {
+ int subexp_idx;
+ struct re_backref_cache_entry *ent;
+ ent = bkref_ents + limits->elems[lim_idx];
+
+ if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
+ continue; /* This is unrelated limitation. */
+
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+ if (ent->subexp_to == str_idx)
+ {
+ int ops_node = -1;
+ int cls_node = -1;
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_OPEN_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ ops_node = node;
+ else if (type == OP_CLOSE_SUBEXP
+ && subexp_idx == dfa->nodes[node].opr.idx)
+ cls_node = node;
+ }
+
+ /* Check the limitation of the open subexpression. */
+ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
+ if (ops_node >= 0)
+ {
+ err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ /* Check the limitation of the close subexpression. */
+ if (cls_node >= 0)
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ if (!re_node_set_contains (dfa->inveclosures + node,
+ cls_node)
+ && !re_node_set_contains (dfa->eclosures + node,
+ cls_node))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ --node_idx;
+ }
+ }
+ }
+ else /* (ent->subexp_to != str_idx) */
+ {
+ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+ {
+ int node = dest_nodes->elems[node_idx];
+ re_token_type_t type = dfa->nodes[node].type;
+ if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
+ {
+ if (subexp_idx != dfa->nodes[node].opr.idx)
+ continue;
+ if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
+ || (type == OP_OPEN_SUBEXP))
+ {
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ }
+ }
+ }
+ return REG_NOERROR;
+}
+
+static reg_errcode_t
+sift_states_bkref (mctx, sctx, str_idx, dest_nodes)
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int str_idx;
+ re_node_set *dest_nodes;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int node_idx, node;
+ re_sift_context_t local_sctx;
+ const re_node_set *candidates;
+ candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ : &mctx->state_log[str_idx]->nodes);
+ local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
+
+ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
+ {
+ int cur_bkref_idx = re_string_cur_idx (&mctx->input);
+ re_token_type_t type;
+ node = candidates->elems[node_idx];
+ type = dfa->nodes[node].type;
+ if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
+ continue;
+ /* Avoid infinite loop for the REs like "()\1+". */
+ if (node == sctx->last_node && str_idx == sctx->last_str_idx)
+ continue;
+ if (type == OP_BACK_REF)
+ {
+ int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+ for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+ {
+ int disabled_idx, subexp_len, to_idx, dst_node;
+ struct re_backref_cache_entry *entry;
+ entry = mctx->bkref_ents + enabled_idx;
+ if (entry->str_idx > str_idx)
+ break;
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
+ dst_node)
+ || check_dst_limits (mctx, &sctx->limits, node,
+ str_idx, dst_node, to_idx))
+ continue;
+ {
+ re_dfastate_t *cur_state;
+ entry->flag = 0;
+ for (disabled_idx = enabled_idx + 1;
+ disabled_idx < mctx->nbkref_ents; ++disabled_idx)
+ {
+ struct re_backref_cache_entry *entry2;
+ entry2 = mctx->bkref_ents + disabled_idx;
+ if (entry2->str_idx > str_idx)
+ break;
+ entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
+ }
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits,
+ &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ err = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (err < 0, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+ /* We must not use the variable entry here, since
+ mctx->bkref_ents might be realloced. */
+ mctx->bkref_ents[enabled_idx].flag = 1;
+ }
+ }
+ enabled_idx = search_cur_bkref_entry (mctx, str_idx);
+ for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+ {
+ struct re_backref_cache_entry *entry;
+ entry = mctx->bkref_ents + enabled_idx;
+ if (entry->str_idx > str_idx)
+ break;
+ if (entry->node == node)
+ entry->flag = 0;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ if (local_sctx.sifted_states != NULL)
+ {
+ re_node_set_free (&local_sctx.limits);
+ }
+
+ return err;
+}
+
+
+#ifdef RE_ENABLE_I18N
+static int
+sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx)
+ const re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int node_idx, str_idx, max_str_idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int naccepted;
+ /* Check the node can accept `multi byte'. */
+ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
+ if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
+ !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
+ dfa->nexts[node_idx]))
+ /* The node can't accept the `multi byte', or the
+ destination was already thrown away, then the node
+ could't accept the current input `multi byte'. */
+ naccepted = 0;
+ /* Otherwise, it is sure that the node could accept
+ `naccepted' bytes input. */
+ return naccepted;
+}
+#endif /* RE_ENABLE_I18N */
+
+
+/* Functions for state transition. */
+
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte, and update STATE_LOG if necessary.
+ If STATE can accept a multibyte char/collating element/back reference
+ update the destination of STATE_LOG. */
+
+static re_dfastate_t *
+transit_state (err, mctx, state)
+ reg_errcode_t *err;
+ re_match_context_t *mctx;
+ re_dfastate_t *state;
+{
+ re_dfastate_t **trtable, *next_state;
+ unsigned char ch;
+
+#ifdef RE_ENABLE_I18N
+ /* If the current state can accept multibyte. */
+ if (BE (state->accept_mb, 0))
+ {
+ *err = transit_state_mb (mctx, state);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ }
+#endif /* RE_ENABLE_I18N */
+
+ /* Then decide the next state with the single byte. */
+ if (1)
+ {
+ /* Use transition table. Sorry for the goto, but we really need
+ to squeeze every single instruction here. */
+ ch = re_string_fetch_byte (&mctx->input);
+
+ retry:
+ trtable = state->trtable;
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return trtable[ch + SBC_MAX];
+ else
+ return trtable[ch];
+ }
+
+ if (!build_trtable (mctx->dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ goto retry;
+ }
+#if 0
+ else
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
+}
+
+/* Update the state_log if we need. */
+re_dfastate_t *
+merge_state_with_log (err, mctx, next_state)
+ reg_errcode_t *err;
+ re_match_context_t *mctx;
+ re_dfastate_t *next_state;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int cur_idx = re_string_cur_idx (&mctx->input);
+ if (cur_idx > mctx->state_log_top)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ mctx->state_log_top = cur_idx;
+ }
+ else if (mctx->state_log[cur_idx] == 0)
+ {
+ mctx->state_log[cur_idx] = next_state;
+ }
+ else
+ {
+ re_dfastate_t *pstate;
+ unsigned int context;
+ re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
+ /* If (state_log[cur_idx] != 0), it implies that cur_idx is
+ the destination of a multibyte char/collating element/
+ back reference. Then the next state is the union set of
+ these destinations and the results of the transition table. */
+ pstate = mctx->state_log[cur_idx];
+ log_nodes = pstate->entrance_nodes;
+ if (next_state != NULL)
+ {
+ table_nodes = next_state->entrance_nodes;
+ *err = re_node_set_init_union (&next_nodes, table_nodes,
+ log_nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return;
+ }
+ else
+ next_nodes = *log_nodes;
+
+ /* Note: We already add the nodes of the initial state,
+ then we don't need to add them here. */
+
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input) - 1,
+ mctx->eflags);
+ next_state = mctx->state_log[cur_idx]
+ = re_acquire_state_context (err, dfa, &next_nodes, context);
+
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ if (table_nodes != NULL)
+ re_node_set_free (&next_nodes);
+ }
+
+ if (BE (dfa->nbackref, 0) && next_state != NULL)
+ {
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
+ *err = check_subexp_matching_top (mctx, &next_state->nodes,
+ cur_idx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+
+ /* If the next state has back references. */
+ if (next_state->has_backref)
+ {
+ *err = transit_state_bkref (mctx, &next_state->nodes);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
+ }
+
+ return next_state;
+}
+
+/* Skip bytes in the input that correspond to part of a
+ multi-byte match, then look in the log for a state
+ from which to restart matching. */
+re_dfastate_t *
+find_recover_state (err, mctx)
+ reg_errcode_t *err;
+ re_match_context_t *mctx;
+{
+ re_dfastate_t *cur_state = NULL;
+ do
+ {
+ int max = mctx->state_log_top;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ do
+ {
+ if (++cur_str_idx > max)
+ return NULL;
+ re_string_skip_bytes (&mctx->input, 1);
+ }
+ while (mctx->state_log[cur_str_idx] == NULL);
+
+ cur_state = merge_state_with_log (err, mctx, NULL);
+ }
+ while (err == REG_NOERROR && cur_state == NULL);
+ return cur_state;
+}
+
+/* Helper functions for transit_state. */
+
+/* From the node set CUR_NODES, pick up the nodes whose types are
+ OP_OPEN_SUBEXP and which have corresponding back references in the regular
+ expression. And register them to use them later for evaluating the
+ correspoding back references. */
+
+static reg_errcode_t
+check_subexp_matching_top (mctx, cur_nodes, str_idx)
+ re_match_context_t *mctx;
+ re_node_set *cur_nodes;
+ int str_idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int node_idx;
+ reg_errcode_t err;
+
+ /* TODO: This isn't efficient.
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
+ {
+ int node = cur_nodes->elems[node_idx];
+ if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+ && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
+ && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+ {
+ err = match_ctx_add_subtop (mctx, node, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+#if 0
+/* Return the next state to which the current state STATE will transit by
+ accepting the current input byte. */
+
+static re_dfastate_t *
+transit_state_sb (err, mctx, state)
+ reg_errcode_t *err;
+ re_match_context_t *mctx;
+ re_dfastate_t *state;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ re_node_set next_nodes;
+ re_dfastate_t *next_state;
+ int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
+ unsigned int context;
+
+ *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
+ {
+ int cur_node = state->nodes.elems[node_cnt];
+ if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
+ {
+ *err = re_node_set_merge (&next_nodes,
+ dfa->eclosures + dfa->nexts[cur_node]);
+ if (BE (*err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return NULL;
+ }
+ }
+ }
+ context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
+ next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
+ /* We don't need to check errors here, since the return value of
+ this function is next_state and ERR is already set. */
+
+ re_node_set_free (&next_nodes);
+ re_string_skip_bytes (&mctx->input, 1);
+ return next_state;
+}
+#endif
+
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t
+transit_state_mb (mctx, pstate)
+ re_match_context_t *mctx;
+ re_dfastate_t *pstate;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+
+ for (i = 0; i < pstate->nodes.nelem; ++i)
+ {
+ re_node_set dest_nodes, *new_nodes;
+ int cur_node_idx = pstate->nodes.elems[i];
+ int naccepted = 0, dest_idx;
+ unsigned int context;
+ re_dfastate_t *dest_state;
+
+ if (dfa->nodes[cur_node_idx].constraint)
+ {
+ context = re_string_context_at (&mctx->input,
+ re_string_cur_idx (&mctx->input),
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
+ context))
+ continue;
+ }
+
+ /* How many bytes the node can accept? */
+ if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+ naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+ re_string_cur_idx (&mctx->input));
+ if (naccepted == 0)
+ continue;
+
+ /* The node can accepts `naccepted' bytes. */
+ dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
+ mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
+ : mctx->max_mb_elem_len);
+ err = clean_state_log_if_needed (mctx, dest_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+#ifdef DEBUG
+ assert (dfa->nexts[cur_node_idx] != -1);
+#endif
+ /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
+ then we use pstate->nodes.elems[i] instead. */
+ new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
+
+ dest_state = mctx->state_log[dest_idx];
+ if (dest_state == NULL)
+ dest_nodes = *new_nodes;
+ else
+ {
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes, new_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
+ mctx->state_log[dest_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ if (dest_state != NULL)
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
+ return err;
+ }
+ return REG_NOERROR;
+}
+#endif /* RE_ENABLE_I18N */
+
+static reg_errcode_t
+transit_state_bkref (mctx, nodes)
+ re_match_context_t *mctx;
+ const re_node_set *nodes;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int i;
+ int cur_str_idx = re_string_cur_idx (&mctx->input);
+
+ for (i = 0; i < nodes->nelem; ++i)
+ {
+ int dest_str_idx, prev_nelem, bkc_idx;
+ int node_idx = nodes->elems[i];
+ unsigned int context;
+ const re_token_t *node = dfa->nodes + node_idx;
+ re_node_set *new_dest_nodes;
+
+ /* Check whether `node' is a backreference or not. */
+ if (node->type != OP_BACK_REF)
+ continue;
+
+ if (node->constraint)
+ {
+ context = re_string_context_at (&mctx->input, cur_str_idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ continue;
+ }
+
+ /* `node' is a backreference.
+ Check the substring which the substring matched. */
+ bkc_idx = mctx->nbkref_ents;
+ err = get_subexp (mctx, node_idx, cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* And add the epsilon closures (which is `new_dest_nodes') of
+ the backreference to appropriate state_log. */
+#ifdef DEBUG
+ assert (dfa->nexts[node_idx] != -1);
+#endif
+ for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
+ {
+ int subexp_len;
+ re_dfastate_t *dest_state;
+ struct re_backref_cache_entry *bkref_ent;
+ bkref_ent = mctx->bkref_ents + bkc_idx;
+ if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
+ continue;
+ subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
+ : dfa->eclosures + dfa->nexts[node_idx]);
+ dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
+ - bkref_ent->subexp_from);
+ context = re_string_context_at (&mctx->input, dest_str_idx - 1,
+ mctx->eflags);
+ dest_state = mctx->state_log[dest_str_idx];
+ prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
+ : mctx->state_log[cur_str_idx]->nodes.nelem);
+ /* Add `new_dest_node' to state_log. */
+ if (dest_state == NULL)
+ {
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, new_dest_nodes,
+ context);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ re_node_set dest_nodes;
+ err = re_node_set_init_union (&dest_nodes,
+ dest_state->entrance_nodes,
+ new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&dest_nodes);
+ goto free_return;
+ }
+ mctx->state_log[dest_str_idx]
+ = re_acquire_state_context (&err, dfa, &dest_nodes, context);
+ re_node_set_free (&dest_nodes);
+ if (BE (mctx->state_log[dest_str_idx] == NULL
+ && err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ /* We need to check recursively if the backreference can epsilon
+ transit. */
+ if (subexp_len == 0
+ && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
+ {
+ err = check_subexp_matching_top (mctx, new_dest_nodes,
+ cur_str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ err = transit_state_bkref (mctx, new_dest_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ }
+ }
+ err = REG_NOERROR;
+ free_return:
+ return err;
+}
+
+/* Enumerate all the candidates which the backreference BKREF_NODE can match
+ at BKREF_STR_IDX, and register them by match_ctx_add_entry().
+ Note that we might collect inappropriate candidates here.
+ However, the cost of checking them strictly here is too high, then we
+ delay these checking for prune_impossible_nodes(). */
+
+static reg_errcode_t
+get_subexp (mctx, bkref_node, bkref_str_idx)
+ re_match_context_t *mctx;
+ int bkref_node, bkref_str_idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int subexp_num, sub_top_idx;
+ const char *buf = (const char *) re_string_get_buffer (&mctx->input);
+ /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
+ int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
+ for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ {
+ const struct re_backref_cache_entry *entry
+ = &mctx->bkref_ents[cache_idx];
+ if (entry->str_idx > bkref_str_idx)
+ break;
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ }
+ subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
+
+ /* For each sub expression */
+ for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
+ {
+ reg_errcode_t err;
+ re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
+ re_sub_match_last_t *sub_last;
+ int sub_last_idx, sl_str, bkref_str_off;
+
+ if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
+ continue; /* It isn't related. */
+
+ sl_str = sub_top->str_idx;
+ bkref_str_off = bkref_str_idx;
+ /* At first, check the last node of sub expressions we already
+ evaluated. */
+ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
+ {
+ int sl_str_diff;
+ sub_last = sub_top->lasts[sub_last_idx];
+ sl_str_diff = sub_last->str_idx - sl_str;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_diff > 0)
+ {
+ if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
+ {
+ /* Not enough chars for a successful match. */
+ if (bkref_str_off + sl_str_diff > mctx->input.len)
+ break;
+
+ err = clean_state_log_if_needed (mctx,
+ bkref_str_off
+ + sl_str_diff);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
+ break; /* We don't need to search this sub expression any more. */
+ }
+ bkref_str_off += sl_str_diff;
+ sl_str += sl_str_diff;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+
+ /* Reload buf, since the preceding call might have reallocated
+ the buffer. */
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+
+ if (sub_last_idx < sub_top->nlasts)
+ continue;
+ if (sub_last_idx > 0)
+ ++sl_str;
+ /* Then, search for the other last nodes of the sub expression. */
+ for (; sl_str <= bkref_str_idx; ++sl_str)
+ {
+ int cls_node, sl_str_off;
+ const re_node_set *nodes;
+ sl_str_off = sl_str - sub_top->str_idx;
+ /* The matched string by the sub expression match with the substring
+ at the back reference? */
+ if (sl_str_off > 0)
+ {
+ if (BE (bkref_str_off >= mctx->input.valid_len, 0))
+ {
+ /* If we are at the end of the input, we cannot match. */
+ if (bkref_str_off >= mctx->input.len)
+ break;
+
+ err = extend_buffers (mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ buf = (const char *) re_string_get_buffer (&mctx->input);
+ }
+ if (buf [bkref_str_off++] != buf[sl_str - 1])
+ break; /* We don't need to search this sub expression
+ any more. */
+ }
+ if (mctx->state_log[sl_str] == NULL)
+ continue;
+ /* Does this state have a ')' of the sub expression? */
+ nodes = &mctx->state_log[sl_str]->nodes;
+ cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
+ if (cls_node == -1)
+ continue; /* No. */
+ if (sub_top->path == NULL)
+ {
+ sub_top->path = calloc (sizeof (state_array_t),
+ sl_str - sub_top->str_idx + 1);
+ if (sub_top->path == NULL)
+ return REG_ESPACE;
+ }
+ /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
+ in the current context? */
+ err = check_arrival (mctx, sub_top->path, sub_top->node,
+ sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
+ if (err == REG_NOMATCH)
+ continue;
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
+ if (BE (sub_last == NULL, 0))
+ return REG_ESPACE;
+ err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
+ bkref_str_idx);
+ if (err == REG_NOMATCH)
+ continue;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Helper functions for get_subexp(). */
+
+/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
+ If it can arrive, register the sub expression expressed with SUB_TOP
+ and SUB_LAST. */
+
+static reg_errcode_t
+get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
+ re_match_context_t *mctx;
+ const re_sub_match_top_t *sub_top;
+ re_sub_match_last_t *sub_last;
+ int bkref_node, bkref_str;
+{
+ reg_errcode_t err;
+ int to_idx;
+ /* Can the subexpression arrive the back reference? */
+ err = check_arrival (mctx, &sub_last->path, sub_last->node,
+ sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
+ if (err != REG_NOERROR)
+ return err;
+ err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
+ sub_last->str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
+ return clean_state_log_if_needed (mctx, to_idx);
+}
+
+/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
+ Search '(' if FL_OPEN, or search ')' otherwise.
+ TODO: This function isn't efficient...
+ Because there might be more than one nodes whose types are
+ OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
+ nodes.
+ E.g. RE: (a){2} */
+
+static int
+find_subexp_node (dfa, nodes, subexp_idx, type)
+ const re_dfa_t *dfa;
+ const re_node_set *nodes;
+ int subexp_idx, type;
+{
+ int cls_idx;
+ for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
+ {
+ int cls_node = nodes->elems[cls_idx];
+ const re_token_t *node = dfa->nodes + cls_node;
+ if (node->type == type
+ && node->opr.idx == subexp_idx)
+ return cls_node;
+ }
+ return -1;
+}
+
+/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
+ LAST_NODE at LAST_STR. We record the path onto PATH since it will be
+ heavily reused.
+ Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
+
+static reg_errcode_t
+check_arrival (mctx, path, top_node, top_str, last_node, last_str,
+ type)
+ re_match_context_t *mctx;
+ state_array_t *path;
+ int top_node, top_str, last_node, last_str, type;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int subexp_num, backup_cur_idx, str_idx, null_cnt;
+ re_dfastate_t *cur_state = NULL;
+ re_node_set *cur_nodes, next_nodes;
+ re_dfastate_t **backup_state_log;
+ unsigned int context;
+
+ subexp_num = dfa->nodes[top_node].opr.idx;
+ /* Extend the buffer if we need. */
+ if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
+ {
+ re_dfastate_t **new_array;
+ int old_alloc = path->alloc;
+ path->alloc += last_str + mctx->max_mb_elem_len + 1;
+ new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
+ if (new_array == NULL)
+ {
+ path->alloc = old_alloc;
+ return REG_ESPACE;
+ }
+ path->array = new_array;
+ memset (new_array + old_alloc, '\0',
+ sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
+ }
+
+ str_idx = path->next_idx == 0 ? top_str : path->next_idx;
+
+ /* Temporary modify MCTX. */
+ backup_state_log = mctx->state_log;
+ backup_cur_idx = mctx->input.cur_idx;
+ mctx->state_log = path->array;
+ mctx->input.cur_idx = str_idx;
+
+ /* Setup initial node set. */
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ if (str_idx == top_str)
+ {
+ err = re_node_set_init_1 (&next_nodes, top_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ cur_state = mctx->state_log[str_idx];
+ if (cur_state && cur_state->has_backref)
+ {
+ err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
+ if (BE ( err != REG_NOERROR, 0))
+ return err;
+ }
+ else
+ re_node_set_init_empty (&next_nodes);
+ }
+ if (str_idx == top_str || (cur_state && cur_state->has_backref))
+ {
+ if (next_nodes.nelem)
+ {
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
+ subexp_num, type);
+ if (BE ( err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ }
+
+ for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
+ {
+ re_node_set_empty (&next_nodes);
+ if (mctx->state_log[str_idx + 1])
+ {
+ err = re_node_set_merge (&next_nodes,
+ &mctx->state_log[str_idx + 1]->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ if (cur_state)
+ {
+ err = check_arrival_add_next_nodes (mctx, str_idx,
+ &cur_state->nodes, &next_nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ ++str_idx;
+ if (next_nodes.nelem)
+ {
+ err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
+ subexp_num, type);
+ if (BE ( err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ }
+ context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
+ cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
+ if (BE (cur_state == NULL && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&next_nodes);
+ return err;
+ }
+ mctx->state_log[str_idx] = cur_state;
+ null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
+ }
+ re_node_set_free (&next_nodes);
+ cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
+ : &mctx->state_log[last_str]->nodes);
+ path->next_idx = str_idx;
+
+ /* Fix MCTX. */
+ mctx->state_log = backup_state_log;
+ mctx->input.cur_idx = backup_cur_idx;
+
+ /* Then check the current node set has the node LAST_NODE. */
+ if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
+ return REG_NOERROR;
+
+ return REG_NOMATCH;
+}
+
+/* Helper functions for check_arrival. */
+
+/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
+ to NEXT_NODES.
+ TODO: This function is similar to the functions transit_state*(),
+ however this function has many additional works.
+ Can't we unify them? */
+
+static reg_errcode_t
+check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
+ re_match_context_t *mctx;
+ int str_idx;
+ re_node_set *cur_nodes, *next_nodes;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ int cur_idx;
+ reg_errcode_t err;
+ re_node_set union_set;
+ re_node_set_init_empty (&union_set);
+ for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
+ {
+ int naccepted = 0;
+ int cur_node = cur_nodes->elems[cur_idx];
+ re_token_type_t type = dfa->nodes[cur_node].type;
+ if (IS_EPSILON_NODE (type))
+ continue;
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (ACCEPT_MB_NODE (type))
+ {
+ naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
+ str_idx);
+ if (naccepted > 1)
+ {
+ re_dfastate_t *dest_state;
+ int next_node = dfa->nexts[cur_node];
+ int next_idx = str_idx + naccepted;
+ dest_state = mctx->state_log[next_idx];
+ re_node_set_empty (&union_set);
+ if (dest_state)
+ {
+ err = re_node_set_merge (&union_set, &dest_state->nodes);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ err = re_node_set_insert (&union_set, next_node);
+ if (BE (err < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
+ &union_set);
+ if (BE (mctx->state_log[next_idx] == NULL
+ && err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&union_set);
+ return err;
+ }
+ }
+ }
+#endif /* RE_ENABLE_I18N */
+ if (naccepted
+ || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
+ {
+ err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (err < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ return REG_ESPACE;
+ }
+ }
+ }
+ re_node_set_free (&union_set);
+ return REG_NOERROR;
+}
+
+/* For all the nodes in CUR_NODES, add the epsilon closures of them to
+ CUR_NODES, however exclude the nodes which are:
+ - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
+ - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
+*/
+
+static reg_errcode_t
+check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type)
+ re_dfa_t *dfa;
+ re_node_set *cur_nodes;
+ int ex_subexp, type;
+{
+ reg_errcode_t err;
+ int idx, outside_node;
+ re_node_set new_nodes;
+#ifdef DEBUG
+ assert (cur_nodes->nelem);
+#endif
+ err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ /* Create a new node set NEW_NODES with the nodes which are epsilon
+ closures of the node in CUR_NODES. */
+
+ for (idx = 0; idx < cur_nodes->nelem; ++idx)
+ {
+ int cur_node = cur_nodes->elems[idx];
+ re_node_set *eclosure = dfa->eclosures + cur_node;
+ outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
+ if (outside_node == -1)
+ {
+ /* There are no problematic nodes, just merge them. */
+ err = re_node_set_merge (&new_nodes, eclosure);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ else
+ {
+ /* There are problematic nodes, re-calculate incrementally. */
+ err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ {
+ re_node_set_free (&new_nodes);
+ return err;
+ }
+ }
+ }
+ re_node_set_free (cur_nodes);
+ *cur_nodes = new_nodes;
+ return REG_NOERROR;
+}
+
+/* Helper function for check_arrival_expand_ecl.
+ Check incrementally the epsilon closure of TARGET, and if it isn't
+ problematic append it to DST_NODES. */
+
+static reg_errcode_t
+check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
+ re_dfa_t *dfa;
+ int target, ex_subexp, type;
+ re_node_set *dst_nodes;
+{
+ int cur_node;
+ for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
+ {
+ int err;
+
+ if (dfa->nodes[cur_node].type == type
+ && dfa->nodes[cur_node].opr.idx == ex_subexp)
+ {
+ if (type == OP_CLOSE_SUBEXP)
+ {
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ }
+ break;
+ }
+ err = re_node_set_insert (dst_nodes, cur_node);
+ if (BE (err == -1, 0))
+ return REG_ESPACE;
+ if (dfa->edests[cur_node].nelem == 0)
+ break;
+ if (dfa->edests[cur_node].nelem == 2)
+ {
+ err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
+ dfa->edests[cur_node].elems[1],
+ ex_subexp, type);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ cur_node = dfa->edests[cur_node].elems[0];
+ }
+ return REG_NOERROR;
+}
+
+
+/* For all the back references in the current state, calculate the
+ destination of the back references by the appropriate entry
+ in MCTX->BKREF_ENTS. */
+
+static reg_errcode_t
+expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
+ type)
+ re_match_context_t *mctx;
+ int cur_str, last_str, subexp_num, type;
+ re_node_set *cur_nodes;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ reg_errcode_t err;
+ int cache_idx, cache_idx_start;
+ /* The current state. */
+
+ cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ {
+ int to_idx, next_node;
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
+ if (ent->str_idx > cur_str)
+ break;
+ /* Is this entry ENT is appropriate? */
+ if (!re_node_set_contains (cur_nodes, ent->node))
+ continue; /* No. */
+
+ to_idx = cur_str + ent->subexp_to - ent->subexp_from;
+ /* Calculate the destination of the back reference, and append it
+ to MCTX->STATE_LOG. */
+ if (to_idx == cur_str)
+ {
+ /* The backreference did epsilon transit, we must re-check all the
+ node in the current state. */
+ re_node_set new_dests;
+ reg_errcode_t err2, err3;
+ next_node = dfa->edests[ent->node].elems[0];
+ if (re_node_set_contains (cur_nodes, next_node))
+ continue;
+ err = re_node_set_init_1 (&new_dests, next_node);
+ err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
+ err3 = re_node_set_merge (cur_nodes, &new_dests);
+ re_node_set_free (&new_dests);
+ if (BE (err != REG_NOERROR || err2 != REG_NOERROR
+ || err3 != REG_NOERROR, 0))
+ {
+ err = (err != REG_NOERROR ? err
+ : (err2 != REG_NOERROR ? err2 : err3));
+ return err;
+ }
+ /* TODO: It is still inefficient... */
+ cache_idx = cache_idx_start - 1;
+ continue;
+ }
+ else
+ {
+ re_node_set union_set;
+ next_node = dfa->nexts[ent->node];
+ if (mctx->state_log[to_idx])
+ {
+ int ret;
+ if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
+ next_node))
+ continue;
+ err = re_node_set_init_copy (&union_set,
+ &mctx->state_log[to_idx]->nodes);
+ ret = re_node_set_insert (&union_set, next_node);
+ if (BE (err != REG_NOERROR || ret < 0, 0))
+ {
+ re_node_set_free (&union_set);
+ err = err != REG_NOERROR ? err : REG_ESPACE;
+ return err;
+ }
+ }
+ else
+ {
+ err = re_node_set_init_1 (&union_set, next_node);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
+ re_node_set_free (&union_set);
+ if (BE (mctx->state_log[to_idx] == NULL
+ && err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+ return REG_NOERROR;
+}
+
+/* Build transition table for the state.
+ Return the new table if succeeded, otherwise return NULL. */
+
+static int
+build_trtable (dfa, state)
+ re_dfa_t *dfa;
+ re_dfastate_t *state;
+{
+ reg_errcode_t err;
+ int i, j, ch;
+ unsigned int elem, mask;
+ int dests_node_malloced = 0, dest_states_malloced = 0;
+ int ndests; /* Number of the destination states from `state'. */
+ int need_word_trtable = 0;
+ re_dfastate_t **trtable;
+ re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
+ re_node_set follows, *dests_node;
+ bitset *dests_ch;
+ bitset acceptable;
+
+ /* We build DFA states which corresponds to the destination nodes
+ from `state'. `dests_node[i]' represents the nodes which i-th
+ destination state contains, and `dests_ch[i]' represents the
+ characters which i-th destination state accepts. */
+#ifdef _LIBC
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
+ dests_node = (re_node_set *)
+ alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ else
+#endif
+ {
+ dests_node = (re_node_set *)
+ malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ if (BE (dests_node == NULL, 0))
+ return 0;
+ dests_node_malloced = 1;
+ }
+ dests_ch = (bitset *) (dests_node + SBC_MAX);
+
+ /* At first, group all nodes belonging to `state' into several
+ destinations. */
+ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
+ if (BE (ndests <= 0, 0))
+ {
+ if (dests_node_malloced)
+ free (dests_node);
+ if (ndests == 0)
+ state->trtable = (re_dfastate_t **)
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+
+ /* Return 0 in case of an error, 1 otherwise. */
+ return state->trtable != NULL;
+ }
+
+ err = re_node_set_alloc (&follows, ndests + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+
+#ifdef _LIBC
+ if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+ + ndests * 3 * sizeof (re_dfastate_t *)))
+ dest_states = (re_dfastate_t **)
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ else
+#endif
+ {
+ dest_states = (re_dfastate_t **)
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ if (BE (dest_states == NULL, 0))
+ {
+out_free:
+ if (dest_states_malloced)
+ free (dest_states);
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+ if (dests_node_malloced)
+ free (dests_node);
+ return 0;
+ }
+ dest_states_malloced = 1;
+ }
+ dest_states_word = dest_states + ndests;
+ dest_states_nl = dest_states_word + ndests;
+ bitset_empty (acceptable);
+
+ /* Then build the states for all destinations. */
+ for (i = 0; i < ndests; ++i)
+ {
+ int next_node;
+ re_node_set_empty (&follows);
+ /* Merge the follows of this destination states. */
+ for (j = 0; j < dests_node[i].nelem; ++j)
+ {
+ next_node = dfa->nexts[dests_node[i].elems[j]];
+ if (next_node != -1)
+ {
+ err = re_node_set_merge (&follows, dfa->eclosures + next_node);
+ if (BE (err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ }
+ dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
+ if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ /* If the new state has context constraint,
+ build appropriate states for these contexts. */
+ if (dest_states[i]->has_constraint)
+ {
+ dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_WORD);
+ if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+
+#ifdef RE_ENABLE_I18N
+ if (dest_states[i] != dest_states_word[i]
+ && dfa->mb_cur_max > 1)
+ need_word_trtable = 1;
+#endif
+
+ dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
+ CONTEXT_NEWLINE);
+ if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
+ goto out_free;
+ }
+ else
+ {
+ dest_states_word[i] = dest_states[i];
+ dest_states_nl[i] = dest_states[i];
+ }
+ bitset_merge (acceptable, dests_ch[i]);
+ }
+
+ if (!BE (need_word_trtable, 0))
+ {
+ /* We don't care about whether the following character is a word
+ character, or we are in a single-byte character set so we can
+ discern by looking at the character code: allocate a
+ 256-entry transition table. */
+ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_UINTS; ++i)
+ for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ if (dfa->word_char[i] & mask)
+ trtable[ch] = dest_states_word[j];
+ else
+ trtable[ch] = dest_states[j];
+ }
+ }
+#ifdef RE_ENABLE_I18N
+ else
+ {
+ /* We care about whether the following character is a word
+ character, and we are in a multi-byte character set: discern
+ by looking at the character code: build two 256-entry
+ transition tables, one starting at trtable[0] and one
+ starting at trtable[SBC_MAX]. */
+ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
+ 2 * SBC_MAX);
+ if (BE (trtable == NULL, 0))
+ goto out_free;
+
+ /* For all characters ch...: */
+ for (i = 0; i < BITSET_UINTS; ++i)
+ for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+ elem;
+ mask <<= 1, elem >>= 1, ++ch)
+ if (BE (elem & 1, 0))
+ {
+ /* There must be exactly one destination which accepts
+ character ch. See group_nodes_into_DFAstates. */
+ for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+ ;
+
+ /* j-th destination accepts the word character ch. */
+ trtable[ch] = dest_states[j];
+ trtable[ch + SBC_MAX] = dest_states_word[j];
+ }
+ }
+#endif
+
+ /* new line */
+ if (bitset_contain (acceptable, NEWLINE_CHAR))
+ {
+ /* The current state accepts newline character. */
+ for (j = 0; j < ndests; ++j)
+ if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
+ {
+ /* k-th destination accepts newline character. */
+ trtable[NEWLINE_CHAR] = dest_states_nl[j];
+ if (need_word_trtable)
+ trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
+ /* There must be only one destination which accepts
+ newline. See group_nodes_into_DFAstates. */
+ break;
+ }
+ }
+
+ if (dest_states_malloced)
+ free (dest_states);
+
+ re_node_set_free (&follows);
+ for (i = 0; i < ndests; ++i)
+ re_node_set_free (dests_node + i);
+
+ if (dests_node_malloced)
+ free (dests_node);
+
+ if (need_word_trtable)
+ state->word_trtable = trtable;
+ else
+ state->trtable = trtable;
+
+ return 1;
+}
+
+/* Group all nodes belonging to STATE into several destinations.
+ Then for all destinations, set the nodes belonging to the destination
+ to DESTS_NODE[i] and set the characters accepted by the destination
+ to DEST_CH[i]. This function return the number of destinations. */
+
+static int
+group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
+ re_dfa_t *dfa;
+ const re_dfastate_t *state;
+ re_node_set *dests_node;
+ bitset *dests_ch;
+{
+ reg_errcode_t err;
+ int i, j, k;
+ int ndests; /* Number of the destinations from `state'. */
+ bitset accepts; /* Characters a node can accept. */
+ const re_node_set *cur_nodes = &state->nodes;
+ bitset_empty (accepts);
+ ndests = 0;
+
+ /* For all the nodes belonging to `state', */
+ for (i = 0; i < cur_nodes->nelem; ++i)
+ {
+ re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
+ re_token_type_t type = node->type;
+ unsigned int constraint = node->constraint;
+
+ /* Enumerate all single byte character this node can accept. */
+ if (type == CHARACTER)
+ bitset_set (accepts, node->opr.c);
+ else if (type == SIMPLE_BRACKET)
+ {
+ bitset_merge (accepts, node->opr.sbcset);
+ }
+ else if (type == OP_PERIOD)
+ {
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ bitset_merge (accepts, dfa->sb_char);
+ else
+#endif
+ bitset_set_all (accepts);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#ifdef RE_ENABLE_I18N
+ else if (type == OP_UTF8_PERIOD)
+ {
+ memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ if (!(dfa->syntax & RE_DOT_NEWLINE))
+ bitset_clear (accepts, '\n');
+ if (dfa->syntax & RE_DOT_NOT_NULL)
+ bitset_clear (accepts, '\0');
+ }
+#endif
+ else
+ continue;
+
+ /* Check the `accepts' and sift the characters which are not
+ match it the context. */
+ if (constraint)
+ {
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ {
+ int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+ bitset_empty (accepts);
+ if (accepts_newline)
+ bitset_set (accepts, NEWLINE_CHAR);
+ else
+ continue;
+ }
+ if (constraint & NEXT_ENDBUF_CONSTRAINT)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+
+ if (constraint & NEXT_WORD_CONSTRAINT)
+ {
+ unsigned int any_set = 0;
+ if (type == CHARACTER && !node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_UINTS; ++j)
+ any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_UINTS; ++j)
+ any_set |= (accepts[j] &= dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ {
+ unsigned int any_set = 0;
+ if (type == CHARACTER && node->word_char)
+ {
+ bitset_empty (accepts);
+ continue;
+ }
+#ifdef RE_ENABLE_I18N
+ if (dfa->mb_cur_max > 1)
+ for (j = 0; j < BITSET_UINTS; ++j)
+ any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
+ else
+#endif
+ for (j = 0; j < BITSET_UINTS; ++j)
+ any_set |= (accepts[j] &= ~dfa->word_char[j]);
+ if (!any_set)
+ continue;
+ }
+ }
+
+ /* Then divide `accepts' into DFA states, or create a new
+ state. Above, we make sure that accepts is not empty. */
+ for (j = 0; j < ndests; ++j)
+ {
+ bitset intersec; /* Intersection sets, see below. */
+ bitset remains;
+ /* Flags, see below. */
+ int has_intersec, not_subset, not_consumed;
+
+ /* Optimization, skip if this state doesn't accept the character. */
+ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
+ continue;
+
+ /* Enumerate the intersection set of this state and `accepts'. */
+ has_intersec = 0;
+ for (k = 0; k < BITSET_UINTS; ++k)
+ has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
+ /* And skip if the intersection set is empty. */
+ if (!has_intersec)
+ continue;
+
+ /* Then check if this state is a subset of `accepts'. */
+ not_subset = not_consumed = 0;
+ for (k = 0; k < BITSET_UINTS; ++k)
+ {
+ not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
+ not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
+ }
+
+ /* If this state isn't a subset of `accepts', create a
+ new group state, which has the `remains'. */
+ if (not_subset)
+ {
+ bitset_copy (dests_ch[ndests], remains);
+ bitset_copy (dests_ch[j], intersec);
+ err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ }
+
+ /* Put the position in the current group. */
+ err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (err < 0, 0))
+ goto error_return;
+
+ /* If all characters are consumed, go to next node. */
+ if (!not_consumed)
+ break;
+ }
+ /* Some characters remain, create a new group. */
+ if (j == ndests)
+ {
+ bitset_copy (dests_ch[ndests], accepts);
+ err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
+ if (BE (err != REG_NOERROR, 0))
+ goto error_return;
+ ++ndests;
+ bitset_empty (accepts);
+ }
+ }
+ return ndests;
+ error_return:
+ for (j = 0; j < ndests; ++j)
+ re_node_set_free (dests_node + j);
+ return -1;
+}
+
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+ Return the number of the bytes the node accepts.
+ STR_IDX is the current index of the input string.
+
+ This function handles the nodes which can accept one character, or
+ one collating element like '.', '[a-z]', opposite to the other nodes
+ can only accept one byte. */
+
+static int
+check_node_accept_bytes (dfa, node_idx, input, str_idx)
+ re_dfa_t *dfa;
+ int node_idx, str_idx;
+ const re_string_t *input;
+{
+ const re_token_t *node = dfa->nodes + node_idx;
+ int char_len, elem_len;
+ int i;
+
+ if (BE (node->type == OP_UTF8_PERIOD, 0))
+ {
+ unsigned char c = re_string_byte_at (input, str_idx), d;
+ if (BE (c < 0xc2, 1))
+ return 0;
+
+ if (str_idx + 2 > input->len)
+ return 0;
+
+ d = re_string_byte_at (input, str_idx + 1);
+ if (c < 0xe0)
+ return (d < 0x80 || d > 0xbf) ? 0 : 2;
+ else if (c < 0xf0)
+ {
+ char_len = 3;
+ if (c == 0xe0 && d < 0xa0)
+ return 0;
+ }
+ else if (c < 0xf8)
+ {
+ char_len = 4;
+ if (c == 0xf0 && d < 0x90)
+ return 0;
+ }
+ else if (c < 0xfc)
+ {
+ char_len = 5;
+ if (c == 0xf8 && d < 0x88)
+ return 0;
+ }
+ else if (c < 0xfe)
+ {
+ char_len = 6;
+ if (c == 0xfc && d < 0x84)
+ return 0;
+ }
+ else
+ return 0;
+
+ if (str_idx + char_len > input->len)
+ return 0;
+
+ for (i = 1; i < char_len; ++i)
+ {
+ d = re_string_byte_at (input, str_idx + i);
+ if (d < 0x80 || d > 0xbf)
+ return 0;
+ }
+ return char_len;
+ }
+
+ char_len = re_string_char_size_at (input, str_idx);
+ if (node->type == OP_PERIOD)
+ {
+ if (char_len <= 1)
+ return 0;
+ /* FIXME: I don't think this if is needed, as both '\n'
+ and '\0' are char_len == 1. */
+ /* '.' accepts any one character except the following two cases. */
+ if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
+ re_string_byte_at (input, str_idx) == '\n') ||
+ ((dfa->syntax & RE_DOT_NOT_NULL) &&
+ re_string_byte_at (input, str_idx) == '\0'))
+ return 0;
+ return char_len;
+ }
+
+ elem_len = re_string_elem_size_at (input, str_idx);
+ if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
+ return 0;
+
+ if (node->type == COMPLEX_BRACKET)
+ {
+ const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
+ const unsigned char *pin = ((char *) re_string_get_buffer (input)
+ + str_idx);
+ int j;
+ uint32_t nrules;
+# endif /* _LIBC */
+ int match_len = 0;
+ wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+ ? re_string_wchar_at (input, str_idx) : 0);
+
+ /* match with multibyte character? */
+ for (i = 0; i < cset->nmbchars; ++i)
+ if (wc == cset->mbchars[i])
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ /* match with character_class? */
+ for (i = 0; i < cset->nchar_classes; ++i)
+ {
+ wctype_t wt = cset->char_classes[i];
+ if (__iswctype (wc, wt))
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+# ifdef _LIBC
+ nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules != 0)
+ {
+ unsigned int in_collseq = 0;
+ const int32_t *table, *indirect;
+ const unsigned char *weights, *extra;
+ const char *collseqwc;
+ int32_t idx;
+ /* This #include defines a local function! */
+# include <locale/weight.h>
+
+ /* match with collating_symbol? */
+ if (cset->ncoll_syms)
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ for (i = 0; i < cset->ncoll_syms; ++i)
+ {
+ const unsigned char *coll_sym = extra + cset->coll_syms[i];
+ /* Compare the length of input collating element and
+ the length of current collating element. */
+ if (*coll_sym != elem_len)
+ continue;
+ /* Compare each bytes. */
+ for (j = 0; j < *coll_sym; j++)
+ if (pin[j] != coll_sym[1 + j])
+ break;
+ if (j == *coll_sym)
+ {
+ /* Match if every bytes is equal. */
+ match_len = j;
+ goto check_node_accept_bytes_match;
+ }
+ }
+
+ if (cset->nranges)
+ {
+ if (elem_len <= char_len)
+ {
+ collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
+ in_collseq = __collseq_table_lookup (collseqwc, wc);
+ }
+ else
+ in_collseq = find_collation_sequence_value (pin, elem_len);
+ }
+ /* match with range expression? */
+ for (i = 0; i < cset->nranges; ++i)
+ if (cset->range_starts[i] <= in_collseq
+ && in_collseq <= cset->range_ends[i])
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+
+ /* match with equivalence_class? */
+ if (cset->nequiv_classes)
+ {
+ const unsigned char *cp = pin;
+ table = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
+ weights = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+ extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+ indirect = (const int32_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
+ idx = findidx (&cp);
+ if (idx > 0)
+ for (i = 0; i < cset->nequiv_classes; ++i)
+ {
+ int32_t equiv_class_idx = cset->equiv_classes[i];
+ size_t weight_len = weights[idx];
+ if (weight_len == weights[equiv_class_idx])
+ {
+ int cnt = 0;
+ while (cnt <= weight_len
+ && (weights[equiv_class_idx + 1 + cnt]
+ == weights[idx + 1 + cnt]))
+ ++cnt;
+ if (cnt > weight_len)
+ {
+ match_len = elem_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ }
+ }
+ else
+# endif /* _LIBC */
+ {
+ /* match with range expression? */
+#if __GNUC__ >= 2
+ wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+#else
+ wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
+ cmp_buf[2] = wc;
+#endif
+ for (i = 0; i < cset->nranges; ++i)
+ {
+ cmp_buf[0] = cset->range_starts[i];
+ cmp_buf[4] = cset->range_ends[i];
+ if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+ && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
+ {
+ match_len = char_len;
+ goto check_node_accept_bytes_match;
+ }
+ }
+ }
+ check_node_accept_bytes_match:
+ if (!cset->non_match)
+ return match_len;
+ else
+ {
+ if (match_len > 0)
+ return 0;
+ else
+ return (elem_len > char_len) ? elem_len : char_len;
+ }
+ }
+ return 0;
+}
+
+# ifdef _LIBC
+static unsigned int
+find_collation_sequence_value (mbs, mbs_len)
+ const unsigned char *mbs;
+ size_t mbs_len;
+{
+ uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
+ if (nrules == 0)
+ {
+ if (mbs_len == 1)
+ {
+ /* No valid character. Match it as a single byte character. */
+ const unsigned char *collseq = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
+ return collseq[mbs[0]];
+ }
+ return UINT_MAX;
+ }
+ else
+ {
+ int32_t idx;
+ const unsigned char *extra = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+ int32_t extrasize = (const unsigned char *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
+
+ for (idx = 0; idx < extrasize;)
+ {
+ int mbs_cnt, found = 0;
+ int32_t elem_mbs_len;
+ /* Skip the name of collating element name. */
+ idx = idx + extra[idx] + 1;
+ elem_mbs_len = extra[idx++];
+ if (mbs_len == elem_mbs_len)
+ {
+ for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
+ if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
+ break;
+ if (mbs_cnt == elem_mbs_len)
+ /* Found the entry. */
+ found = 1;
+ }
+ /* Skip the byte sequence of the collating element. */
+ idx += elem_mbs_len;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ /* Skip the wide char sequence of the collating element. */
+ idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
+ /* If we found the entry, return the sequence value. */
+ if (found)
+ return *(uint32_t *) (extra + idx);
+ /* Skip the collation sequence value. */
+ idx += sizeof (uint32_t);
+ }
+ return UINT_MAX;
+ }
+}
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+
+/* Check whether the node accepts the byte which is IDX-th
+ byte of the INPUT. */
+
+static int
+check_node_accept (mctx, node, idx)
+ const re_match_context_t *mctx;
+ const re_token_t *node;
+ int idx;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ unsigned char ch;
+ if (node->constraint)
+ {
+ /* The node has constraints. Check whether the current context
+ satisfies the constraints. */
+ unsigned int context = re_string_context_at (&mctx->input, idx,
+ mctx->eflags);
+ if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+ return 0;
+ }
+ ch = re_string_byte_at (&mctx->input, idx);
+ switch (node->type)
+ {
+ case CHARACTER:
+ return node->opr.c == ch;
+ case SIMPLE_BRACKET:
+ return bitset_contain (node->opr.sbcset, ch);
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ if (ch >= 0x80)
+ return 0;
+ /* FALLTHROUGH */
+#endif
+ case OP_PERIOD:
+ return !((ch == '\n' && !(dfa->syntax & RE_DOT_NEWLINE))
+ || (ch == '\0' && (dfa->syntax & RE_DOT_NOT_NULL)));
+ default:
+ return 0;
+ }
+}
+
+/* Extend the buffers, if the buffers have run out. */
+
+static reg_errcode_t
+extend_buffers (mctx)
+ re_match_context_t *mctx;
+{
+ reg_errcode_t ret;
+ re_string_t *pstr = &mctx->input;
+
+ /* Double the lengthes of the buffers. */
+ ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ if (mctx->state_log != NULL)
+ {
+ /* And double the length of state_log. */
+ /* XXX We have no indication of the size of this buffer. If this
+ allocation fail we have no indication that the state_log array
+ does not have the right size. */
+ re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
+ pstr->bufs_len + 1);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->state_log = new_array;
+ }
+
+ /* Then reconstruct the buffers. */
+ if (pstr->icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
+ else
+#endif /* RE_ENABLE_I18N */
+ build_upper_buffer (pstr);
+ }
+ else
+ {
+#ifdef RE_ENABLE_I18N
+ if (pstr->mb_cur_max > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ }
+ return REG_NOERROR;
+}
+
+
+/* Functions for matching context. */
+
+/* Initialize MCTX. */
+
+static reg_errcode_t
+match_ctx_init (mctx, eflags, n)
+ re_match_context_t *mctx;
+ int eflags, n;
+{
+ mctx->eflags = eflags;
+ mctx->match_last = -1;
+ if (n > 0)
+ {
+ mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
+ mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
+ if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
+ return REG_ESPACE;
+ }
+ /* Already zero-ed by the caller.
+ else
+ mctx->bkref_ents = NULL;
+ mctx->nbkref_ents = 0;
+ mctx->nsub_tops = 0; */
+ mctx->abkref_ents = n;
+ mctx->max_mb_elem_len = 1;
+ mctx->asub_tops = n;
+ return REG_NOERROR;
+}
+
+/* Clean the entries which depend on the current input in MCTX.
+ This function must be invoked when the matcher changes the start index
+ of the input, or changes the input string. */
+
+static void
+match_ctx_clean (mctx)
+ re_match_context_t *mctx;
+{
+ int st_idx;
+ for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
+ {
+ int sl_idx;
+ re_sub_match_top_t *top = mctx->sub_tops[st_idx];
+ for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
+ {
+ re_sub_match_last_t *last = top->lasts[sl_idx];
+ re_free (last->path.array);
+ re_free (last);
+ }
+ re_free (top->lasts);
+ if (top->path)
+ {
+ re_free (top->path->array);
+ re_free (top->path);
+ }
+ free (top);
+ }
+
+ mctx->nsub_tops = 0;
+ mctx->nbkref_ents = 0;
+}
+
+/* Free all the memory associated with MCTX. */
+
+static void
+match_ctx_free (mctx)
+ re_match_context_t *mctx;
+{
+ /* First, free all the memory associated with MCTX->SUB_TOPS. */
+ match_ctx_clean (mctx);
+ re_free (mctx->sub_tops);
+ re_free (mctx->bkref_ents);
+}
+
+
+/* Add a new backreference entry to MCTX.
+ Note that we assume that caller never call this function with duplicate
+ entry, and call with STR_IDX which isn't smaller than any existing entry.
+*/
+
+static reg_errcode_t
+match_ctx_add_entry (mctx, node, str_idx, from, to)
+ re_match_context_t *mctx;
+ int node, str_idx, from, to;
+{
+ if (mctx->nbkref_ents >= mctx->abkref_ents)
+ {
+ struct re_backref_cache_entry* new_entry;
+ new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
+ mctx->abkref_ents * 2);
+ if (BE (new_entry == NULL, 0))
+ {
+ re_free (mctx->bkref_ents);
+ return REG_ESPACE;
+ }
+ mctx->bkref_ents = new_entry;
+ memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
+ sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
+ mctx->abkref_ents *= 2;
+ }
+ mctx->bkref_ents[mctx->nbkref_ents].node = node;
+ mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
+ mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
+ mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
+ if (mctx->max_mb_elem_len < to - from)
+ mctx->max_mb_elem_len = to - from;
+ return REG_NOERROR;
+}
+
+/* Search for the first entry which has the same str_idx.
+ Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+
+static int
+search_cur_bkref_entry (mctx, str_idx)
+ re_match_context_t *mctx;
+ int str_idx;
+{
+ int left, right, mid;
+ right = mctx->nbkref_ents;
+ for (left = 0; left < right;)
+ {
+ mid = (left + right) / 2;
+ if (mctx->bkref_ents[mid].str_idx < str_idx)
+ left = mid + 1;
+ else
+ right = mid;
+ }
+ return left;
+}
+
+static void
+match_ctx_clear_flag (mctx)
+ re_match_context_t *mctx;
+{
+ int i;
+ for (i = 0; i < mctx->nbkref_ents; ++i)
+ mctx->bkref_ents[i].flag = 0;
+}
+
+/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
+ at STR_IDX. */
+
+static reg_errcode_t
+match_ctx_add_subtop (mctx, node, str_idx)
+ re_match_context_t *mctx;
+ int node, str_idx;
+{
+#ifdef DEBUG
+ assert (mctx->sub_tops != NULL);
+ assert (mctx->asub_tops > 0);
+#endif
+ if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
+ {
+ int new_asub_tops = mctx->asub_tops * 2;
+ re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
+ re_sub_match_top_t *,
+ new_asub_tops);
+ if (BE (new_array == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops = new_array;
+ mctx->asub_tops = new_asub_tops;
+ }
+ mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
+ if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
+ return REG_ESPACE;
+ mctx->sub_tops[mctx->nsub_tops]->node = node;
+ mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
+ return REG_NOERROR;
+}
+
+/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
+ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
+
+static re_sub_match_last_t *
+match_ctx_add_sublast (subtop, node, str_idx)
+ re_sub_match_top_t *subtop;
+ int node, str_idx;
+{
+ re_sub_match_last_t *new_entry;
+ if (BE (subtop->nlasts == subtop->alasts, 0))
+ {
+ int new_alasts = 2 * subtop->alasts + 1;
+ re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
+ re_sub_match_last_t *,
+ new_alasts);
+ if (BE (new_array == NULL, 0))
+ return NULL;
+ subtop->lasts = new_array;
+ subtop->alasts = new_alasts;
+ }
+ new_entry = calloc (1, sizeof (re_sub_match_last_t));
+ if (BE (new_entry != NULL, 1))
+ {
+ subtop->lasts[subtop->nlasts] = new_entry;
+ new_entry->node = node;
+ new_entry->str_idx = str_idx;
+ ++subtop->nlasts;
+ }
+ return new_entry;
+}
+
+static void
+sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
+ check_subexp)
+ re_sift_context_t *sctx;
+ re_dfastate_t **sifted_sts, **limited_sts;
+ int last_node, last_str_idx, check_subexp;
+{
+ sctx->sifted_states = sifted_sts;
+ sctx->limited_states = limited_sts;
+ sctx->last_node = last_node;
+ sctx->last_str_idx = last_str_idx;
+ sctx->check_subexp = check_subexp;
+ sctx->cur_bkref = -1;
+ sctx->cls_subexp_idx = -1;
+ re_node_set_init_empty (&sctx->limits);
+}
diff --git a/lib/stdbool_.h b/lib/stdbool_.h
new file mode 100644
index 0000000..69f6ef7
--- /dev/null
+++ b/lib/stdbool_.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2001-2002 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _STDBOOL_H
+#define _STDBOOL_H
+
+/* ISO C 99 <stdbool.h> for platforms that lack it. */
+
+/* 7.16. Boolean type and values */
+
+/* BeOS <sys/socket.h> already #defines false 0, true 1. We use the same
+ definitions below, but temporarily we have to #undef them. */
+#ifdef __BEOS__
+# undef false
+# undef true
+#endif
+
+/* For the sake of symbolic names in gdb, define _Bool as an enum type. */
+#ifndef __cplusplus
+# if !@HAVE__BOOL@
+typedef enum { false = 0, true = 1 } _Bool;
+# endif
+#else
+typedef bool _Bool;
+#endif
+#define bool _Bool
+
+/* The other macros must be usable in preprocessor directives. */
+#define false 0
+#define true 1
+#define __bool_true_false_are_defined 1
+
+#endif /* _STDBOOL_H */
diff --git a/lib/strerror.c b/lib/strerror.c
new file mode 100644
index 0000000..5a666d2
--- /dev/null
+++ b/lib/strerror.c
@@ -0,0 +1,52 @@
+/* strerror -- return a string corresponding to an error number.
+ This is a quickie version only intended as compatability glue
+ for systems which predate the ANSI C definition of the function;
+ the glibc version is recommended for more general use.
+
+ Copyright (C) 1998 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+
+#ifndef HAVE_STRERROR
+
+# ifndef BOOTSTRAP
+# include <stdio.h>
+# endif
+# ifdef HAVE_STRING_H
+# include <string.h>
+# endif
+# include <errno.h>
+# undef strerror
+
+extern int sys_nerr;
+extern char *sys_errlist[];
+
+char *
+strerror(e)
+ int e;
+{
+ static char unknown_string[] =
+ "Unknown error code #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+
+ if (0<=e && e<sys_nerr)
+ return sys_errlist[e];
+ sprintf(unknown_string+20, "%d", e);
+ return unknown_string;
+}
+
+#endif /* !HAVE_STRERROR */
diff --git a/lib/strverscmp.c b/lib/strverscmp.c
new file mode 100644
index 0000000..93d7160
--- /dev/null
+++ b/lib/strverscmp.c
@@ -0,0 +1,132 @@
+/* Compare strings while treating digits characters numerically.
+ Copyright (C) 1997, 2000, 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+#include <ctype.h>
+
+/* states: S_N: normal, S_I: comparing integral part, S_F: comparing
+ fractional parts, S_Z: idem but with leading Zeroes only */
+#define S_N 0x0
+#define S_I 0x4
+#define S_F 0x8
+#define S_Z 0xC
+
+/* result_type: CMP: return diff; LEN: compare using len_diff/diff */
+#define CMP 2
+#define LEN 3
+
+
+/* ISDIGIT differs from isdigit, as follows:
+ - Its arg may be any int or unsigned int; it need not be an unsigned char.
+ - It's guaranteed to evaluate its argument exactly once.
+ - It's typically faster.
+ POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to
+ ISDIGIT_LOCALE unless it's important to use the locale's definition
+ of `digit' even when the host does not conform to POSIX. */
+#define ISDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
+#undef __strverscmp
+#undef strverscmp
+
+#ifndef weak_alias
+# define __strverscmp strverscmp
+#endif
+
+/* Compare S1 and S2 as strings holding indices/version numbers,
+ returning less than, equal to or greater than zero if S1 is less than,
+ equal to or greater than S2 (for more info, see the texinfo doc).
+*/
+
+int
+__strverscmp (const char *s1, const char *s2)
+{
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+ unsigned char c1, c2;
+ int state;
+ int diff;
+
+ /* Symbol(s) 0 [1-9] others (padding)
+ Transition (10) 0 (01) d (00) x (11) - */
+ static const unsigned int next_state[] =
+ {
+ /* state x d 0 - */
+ /* S_N */ S_N, S_I, S_Z, S_N,
+ /* S_I */ S_N, S_I, S_I, S_I,
+ /* S_F */ S_N, S_F, S_F, S_F,
+ /* S_Z */ S_N, S_F, S_Z, S_Z
+ };
+
+ static const int result_type[] =
+ {
+ /* state x/x x/d x/0 x/- d/x d/d d/0 d/-
+ 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */
+
+ /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
+ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+ /* S_I */ CMP, -1, -1, CMP, 1, LEN, LEN, CMP,
+ 1, LEN, LEN, CMP, CMP, CMP, CMP, CMP,
+ /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
+ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+ /* S_Z */ CMP, 1, 1, CMP, -1, CMP, CMP, CMP,
+ -1, CMP, CMP, CMP
+ };
+
+ if (p1 == p2)
+ return 0;
+
+ c1 = *p1++;
+ c2 = *p2++;
+ /* Hint: '0' is a digit too. */
+ state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0));
+
+ while ((diff = c1 - c2) == 0 && c1 != '\0')
+ {
+ state = next_state[state];
+ c1 = *p1++;
+ c2 = *p2++;
+ state |= (c1 == '0') + (ISDIGIT (c1) != 0);
+ }
+
+ state = result_type[state << 2 | ((c2 == '0') + (ISDIGIT (c2) != 0))];
+
+ switch (state)
+ {
+ case CMP:
+ return diff;
+
+ case LEN:
+ while (ISDIGIT (*p1++))
+ if (!ISDIGIT (*p2++))
+ return 1;
+
+ return ISDIGIT (*p2) ? -1 : diff;
+
+ default:
+ return state;
+ }
+}
+#ifdef weak_alias
+weak_alias (__strverscmp, strverscmp)
+#endif
diff --git a/lib/strverscmp.h b/lib/strverscmp.h
new file mode 100644
index 0000000..bb1ea1b
--- /dev/null
+++ b/lib/strverscmp.h
@@ -0,0 +1,20 @@
+/* strverscmp.h -- compare strings holding indices/version numbers */
+
+#ifndef STRVERSCMP_H_
+# define STRVERSCMP_H_
+
+# if HAVE_CONFIG_H
+# include <config.h>
+# endif
+
+# ifndef PARAMS
+# if defined PROTOTYPES || (defined __STDC__ && __STDC__)
+# define PARAMS(Args) Args
+# else
+# define PARAMS(Args) ()
+# endif
+# endif
+
+int strverscmp PARAMS ((const char*, const char*));
+
+#endif /* not STRVERSCMP_H_ */
diff --git a/lib/utils.c b/lib/utils.c
new file mode 100644
index 0000000..dbabebb
--- /dev/null
+++ b/lib/utils.c
@@ -0,0 +1,520 @@
+/* Functions from hack's utils library.
+ Copyright (C) 1989, 1990, 1991, 1998, 1999, 2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+
+#include <stdio.h>
+
+#include <errno.h>
+#ifndef errno
+ extern int errno;
+#endif
+
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#else
+# include <string.h>
+#endif /* HAVE_STRINGS_H */
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif /* HAVE_STDLIB_H */
+
+#include "utils.h"
+
+const char *myname;
+
+/* Store information about files opened with ck_fopen
+ so that error messages from ck_fread, ck_fwrite, etc. can print the
+ name of the file that had the error */
+
+struct open_file
+ {
+ FILE *fp;
+ char *name;
+ struct open_file *link;
+ unsigned temp : 1;
+ };
+
+static struct open_file *open_files = NULL;
+static void do_ck_fclose P_((FILE *fp));
+
+/* Print an error message and exit */
+#if !defined __STDC__ || !(__STDC__-0)
+# include <varargs.h>
+# define VSTART(l,a) va_start(l)
+void
+panic(str, va_alist)
+ char *str;
+ va_dcl
+#else /*__STDC__*/
+# include <stdarg.h>
+# define VSTART(l,a) va_start(l, a)
+void
+panic(const char *str, ...)
+#endif /* __STDC__ */
+{
+ va_list iggy;
+
+ fprintf(stderr, "%s: ", myname);
+ VSTART(iggy, str);
+#ifndef HAVE_VPRINTF
+# ifndef HAVE_DOPRNT
+ fputs(str, stderr); /* not great, but perhaps better than nothing... */
+# else /* HAVE_DOPRNT */
+ _doprnt(str, &iggy, stderr);
+# endif /* HAVE_DOPRNT */
+#else /* HAVE_VFPRINTF */
+ vfprintf(stderr, str, iggy);
+#endif /* HAVE_VFPRINTF */
+ va_end(iggy);
+ putc('\n', stderr);
+
+ /* Unlink the temporary files. */
+ while (open_files)
+ {
+ if (open_files->temp)
+ {
+ int fd = fileno (open_files->fp);
+ fclose (open_files->fp);
+ errno = 0;
+ unlink (open_files->name);
+ if (errno != 0)
+ fprintf (stderr, _("cannot remove %s: %s"), open_files->name, strerror (errno));
+ }
+
+ open_files = open_files->link;
+ }
+
+ exit(4);
+}
+
+
+/* Internal routine to get a filename from open_files */
+static const char *utils_fp_name P_((FILE *fp));
+static const char *
+utils_fp_name(fp)
+ FILE *fp;
+{
+ struct open_file *p;
+
+ for (p=open_files; p; p=p->link)
+ if (p->fp == fp)
+ return p->name;
+ if (fp == stdin)
+ return "stdin";
+ else if (fp == stdout)
+ return "stdout";
+ else if (fp == stderr)
+ return "stderr";
+
+ return "<unknown>";
+}
+
+/* Panic on failing fopen */
+FILE *
+ck_fopen(name, mode, fail)
+ const char *name;
+ const char *mode;
+ bool fail;
+{
+ FILE *fp;
+ struct open_file *p;
+
+ fp = fopen (name, mode);
+ if (!fp)
+ {
+ if (fail)
+ panic(_("couldn't open file %s: %s"), name, strerror(errno));
+
+ return NULL;
+ }
+
+ for (p=open_files; p; p=p->link)
+ {
+ if (fp == p->fp)
+ {
+ FREE(p->name);
+ break;
+ }
+ }
+ if (!p)
+ {
+ p = MALLOC(1, struct open_file);
+ p->link = open_files;
+ open_files = p;
+ }
+ p->name = ck_strdup(name);
+ p->fp = fp;
+ p->temp = false;
+ return fp;
+}
+
+FILE *
+ck_mkstemp (p_filename, tmpdir, base)
+ char **p_filename;
+ char *base, *tmpdir;
+{
+ char *template;
+ FILE *fp;
+ int fd;
+ struct open_file *p;
+
+ if (tmpdir == NULL)
+ tmpdir = getenv("TMPDIR");
+ if (tmpdir == NULL)
+ {
+ tmpdir = getenv("TMP");
+ if (tmpdir == NULL)
+#ifdef P_tmpdir
+ tmpdir = P_tmpdir;
+#else
+ tmpdir = "/tmp";
+#endif
+ }
+
+ template = xmalloc (strlen (tmpdir) + strlen (base) + 8);
+ sprintf (template, "%s/%sXXXXXX", tmpdir, base);
+
+ fd = mkstemp (template);
+ if (fd == -1)
+ panic(_("couldn't open temporary file %s: %s"), template, strerror(errno));
+
+ *p_filename = template;
+ fp = fdopen (fd, "w");
+
+ p = MALLOC(1, struct open_file);
+ p->name = ck_strdup (template);
+ p->fp = fp;
+ p->temp = true;
+ p->link = open_files;
+ open_files = p;
+ return fp;
+}
+
+/* Panic on failing fwrite */
+void
+ck_fwrite(ptr, size, nmemb, stream)
+ const VOID *ptr;
+ size_t size;
+ size_t nmemb;
+ FILE *stream;
+{
+ clearerr(stream);
+ if (size && fwrite(ptr, size, nmemb, stream) != nmemb)
+ panic(ngettext("couldn't write %d item to %s: %s",
+ "couldn't write %d items to %s: %s", nmemb),
+ nmemb, utils_fp_name(stream), strerror(errno));
+}
+
+/* Panic on failing fread */
+size_t
+ck_fread(ptr, size, nmemb, stream)
+ VOID *ptr;
+ size_t size;
+ size_t nmemb;
+ FILE *stream;
+{
+ clearerr(stream);
+ if (size && (nmemb=fread(ptr, size, nmemb, stream)) <= 0 && ferror(stream))
+ panic(_("read error on %s: %s"), utils_fp_name(stream), strerror(errno));
+
+ return nmemb;
+}
+
+size_t
+ck_getline(text, buflen, stream)
+ char **text;
+ size_t *buflen;
+ FILE *stream;
+{
+ int result;
+ if (!ferror (stream))
+ result = getline (text, buflen, stream);
+
+ if (ferror (stream))
+ panic (_("read error on %s: %s"), utils_fp_name(stream), strerror(errno));
+
+ return result;
+}
+
+/* Panic on failing fflush */
+void
+ck_fflush(stream)
+ FILE *stream;
+{
+ clearerr(stream);
+ if (fflush(stream) == EOF && errno != EBADF)
+ panic("couldn't flush %s: %s", utils_fp_name(stream), strerror(errno));
+}
+
+/* Panic on failing fclose */
+void
+ck_fclose(stream)
+ FILE *stream;
+{
+ struct open_file r;
+ struct open_file *prev;
+ struct open_file *cur;
+
+ /* a NULL stream means to close all files */
+ r.link = open_files;
+ prev = &r;
+ while ( (cur = prev->link) )
+ {
+ if (!stream || stream == cur->fp)
+ {
+ do_ck_fclose (cur->fp);
+ prev->link = cur->link;
+ FREE(cur->name);
+ FREE(cur);
+ }
+ else
+ prev = cur;
+ }
+
+ open_files = r.link;
+
+ /* Also care about stdout, because if it is redirected the
+ last output operations might fail and it is important
+ to signal this as an error (perhaps to make). */
+ if (!stream)
+ {
+ do_ck_fclose (stdout);
+ do_ck_fclose (stderr);
+ }
+}
+
+/* Close a single file. */
+void
+do_ck_fclose(fp)
+ FILE *fp;
+{
+ int fd;
+ ck_fflush(fp);
+ clearerr(fp);
+
+ /* We want to execute both arms, so use | not ||. */
+ if (fclose(fp) == EOF)
+ panic("couldn't close %s: %s", utils_fp_name(fp), strerror(errno));
+}
+
+
+/* Panic on failing rename */
+void
+ck_rename (from, to, unlink_if_fail)
+ const char *from, *to;
+ const char *unlink_if_fail;
+{
+ int rd = rename (from, to);
+ if (rd != -1)
+ return;
+
+ if (unlink_if_fail)
+ {
+ int save_errno = errno;
+ errno = 0;
+ unlink (unlink_if_fail);
+
+ /* Failure to remove the temporary file is more severe, so trigger it first. */
+ if (errno != 0)
+ panic (_("cannot remove %s: %s"), unlink_if_fail, strerror (errno));
+
+ errno = save_errno;
+ }
+
+ panic (_("cannot rename %s: %s"), from, strerror (errno));
+}
+
+
+
+
+/* Panic on failing malloc */
+VOID *
+ck_malloc(size)
+ size_t size;
+{
+ VOID *ret = calloc(1, size ? size : 1);
+ if (!ret)
+ panic("couldn't allocate memory");
+ return ret;
+}
+
+/* Panic on failing malloc */
+VOID *
+xmalloc(size)
+ size_t size;
+{
+ return ck_malloc(size);
+}
+
+/* Panic on failing realloc */
+VOID *
+ck_realloc(ptr, size)
+ VOID *ptr;
+ size_t size;
+{
+ VOID *ret;
+
+ if (size == 0)
+ {
+ FREE(ptr);
+ return NULL;
+ }
+ if (!ptr)
+ return ck_malloc(size);
+ ret = realloc(ptr, size);
+ if (!ret)
+ panic("couldn't re-allocate memory");
+ return ret;
+}
+
+/* Return a malloc()'d copy of a string */
+char *
+ck_strdup(str)
+ const char *str;
+{
+ char *ret = MALLOC(strlen(str)+1, char);
+ return strcpy(ret, str);
+}
+
+/* Return a malloc()'d copy of a block of memory */
+VOID *
+ck_memdup(buf, len)
+ const VOID *buf;
+ size_t len;
+{
+ VOID *ret = ck_malloc(len);
+ return memcpy(ret, buf, len);
+}
+
+/* Release a malloc'd block of memory */
+void
+ck_free(ptr)
+ VOID *ptr;
+{
+ if (ptr)
+ free(ptr);
+}
+
+
+/* Implement a variable sized buffer of `stuff'. We don't know what it is,
+nor do we care, as long as it doesn't mind being aligned by malloc. */
+
+struct buffer
+ {
+ size_t allocated;
+ size_t length;
+ char *b;
+ };
+
+#define MIN_ALLOCATE 50
+
+struct buffer *
+init_buffer()
+{
+ struct buffer *b = MALLOC(1, struct buffer);
+ b->b = MALLOC(MIN_ALLOCATE, char);
+ b->allocated = MIN_ALLOCATE;
+ b->length = 0;
+ return b;
+}
+
+char *
+get_buffer(b)
+ struct buffer *b;
+{
+ return b->b;
+}
+
+size_t
+size_buffer(b)
+ struct buffer *b;
+{
+ return b->length;
+}
+
+static void resize_buffer P_((struct buffer *b, size_t newlen));
+static void
+resize_buffer(b, newlen)
+ struct buffer *b;
+ size_t newlen;
+{
+ char *try = NULL;
+ size_t alen = b->allocated;
+
+ if (newlen <= alen)
+ return;
+ alen *= 2;
+ if (newlen < alen)
+ try = realloc(b->b, alen); /* Note: *not* the REALLOC() macro! */
+ if (!try)
+ {
+ alen = newlen;
+ try = REALLOC(b->b, alen, char);
+ }
+ b->allocated = alen;
+ b->b = try;
+}
+
+char *
+add_buffer(b, p, n)
+ struct buffer *b;
+ const char *p;
+ size_t n;
+{
+ char *result;
+ if (b->allocated - b->length < n)
+ resize_buffer(b, b->length+n);
+ result = memcpy(b->b + b->length, p, n);
+ b->length += n;
+ return result;
+}
+
+char *
+add1_buffer(b, c)
+ struct buffer *b;
+ int c;
+{
+ /* This special case should be kept cheap;
+ * don't make it just a mere convenience
+ * wrapper for add_buffer() -- even "builtin"
+ * versions of memcpy(a, b, 1) can become
+ * expensive when called too often.
+ */
+ if (c != EOF)
+ {
+ char *result;
+ if (b->allocated - b->length < 1)
+ resize_buffer(b, b->length+1);
+ result = b->b + b->length++;
+ *result = c;
+ return result;
+ }
+
+ return NULL;
+}
+
+void
+free_buffer(b)
+ struct buffer *b;
+{
+ if (b)
+ FREE(b->b);
+ FREE(b);
+}
diff --git a/lib/utils.h b/lib/utils.h
new file mode 100644
index 0000000..921795d
--- /dev/null
+++ b/lib/utils.h
@@ -0,0 +1,48 @@
+/* Functions from hack's utils library.
+ Copyright (C) 1989, 1990, 1991, 1998, 1999, 2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include <stdio.h>
+
+#include "basicdefs.h"
+
+void panic P_((const char *str, ...));
+
+FILE *ck_fopen P_((const char *name, const char *mode, bool fail));
+void ck_fwrite P_((const VOID *ptr, size_t size, size_t nmemb, FILE *stream));
+size_t ck_fread P_((VOID *ptr, size_t size, size_t nmemb, FILE *stream));
+void ck_fflush P_((FILE *stream));
+void ck_fclose P_((FILE *stream));
+size_t ck_getline P_((char **text, size_t *buflen, FILE *stream));
+FILE * ck_mkstemp P_((char **p_filename, char *tmpdir, char *base));
+void ck_rename P_((const char *from, const char *to, const char *unlink_if_fail));
+
+VOID *ck_malloc P_((size_t size));
+VOID *xmalloc P_((size_t size));
+VOID *ck_realloc P_((VOID *ptr, size_t size));
+char *ck_strdup P_((const char *str));
+VOID *ck_memdup P_((const VOID *buf, size_t len));
+void ck_free P_((VOID *ptr));
+
+struct buffer *init_buffer P_((void));
+char *get_buffer P_((struct buffer *b));
+size_t size_buffer P_((struct buffer *b));
+char *add_buffer P_((struct buffer *b, const char *p, size_t n));
+char *add1_buffer P_((struct buffer *b, int ch));
+void free_buffer P_((struct buffer *b));
+
+extern const char *myname;
diff --git a/po/ChangeLog b/po/ChangeLog
new file mode 100644
index 0000000..a8b3942
--- /dev/null
+++ b/po/ChangeLog
@@ -0,0 +1,45 @@
+2002-10-26 Paolo Bonzini <bonzini@gnu.org>
+
+ * po/*.po: updated from Translation Project
+
+2001-10-19 gettextize <bug-gnu-utils@gnu.org>
+
+ * Makefile.in.in: Upgrade to gettext-0.10.40.
+ * cat-id-tbl.c: Remove file.
+ * stamp-cat-id: Remove file.
+
+2001-03-02 Paolo Bonzini <bonzini@gnu.org>
+
+ * po/it.po: updated
+ * po/sed.pot: likewise.
+
+2000-12-10 Paolo Bonzini <bonzini@gnu.org>
+
+ * po/it.po: Italian translation reviewed for new POT file.
+ * po/sed.pot: updated
+
+Mon Mar 15 16:25:53 PST 1999 Ken Pizzini <ken@gnu.org>
+
+ * po/it.po: new translation file.
+
+Sun Feb 7 21:22:17 PST 1999 Ken Pizzini <ken@gnu.org>
+
+ * po/de.po: new translation file.
+
+Sat Dec 12 11:18:55 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * po/ru.po: new translation file.
+
+Sun Dec 6 00:51:23 PST 1998 Ken Pizzini <ken@gnu.org>
+
+ * po/fr.po: new translation file.
+
+Sun Aug 16 02:59:20 PDT 1998 Ken Pizzini <ken@gnu.org>
+
+ * sed/compile.c: added N_() markers and corresponding gettext()
+ (er, _()) calls.
+ * po/sed.pot: updated to reflect changed and newly marked text.
+
+1998-07-24 Erick Branderhorst <Erick.Branderhorst@asml.nl>
+
+ * po/nl.po: Dutch translation.
diff --git a/po/POTFILES.in b/po/POTFILES.in
new file mode 100644
index 0000000..3c06d1e
--- /dev/null
+++ b/po/POTFILES.in
@@ -0,0 +1,8 @@
+sed/compile.c
+sed/execute.c
+sed/regexp.c
+sed/sed.c
+lib/utils.c
+lib/regcomp.c
+lib/regexec.c
+lib/regex_internal.c
diff --git a/po/af.po b/po/af.po
new file mode 100644
index 0000000..845b2c2
--- /dev/null
+++ b/po/af.po
@@ -0,0 +1,433 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2004 Free Software Foundation, Inc.
+# This file is distributed under the same license as the sed package.
+# Ysbeer <ysbeer@af.org.za>, 2004
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.9\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-01-11 21:06+0000\n"
+"Last-Translator: Ysbeer <ysbeer@af.org.za>\n"
+"Language-Team: Afrikaans <i18n@af.org.za>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n!=1;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Meervoudige `!'s"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Onverwagte `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Kan nie +N or ~N as die eerste adres gebruik nie"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Ongepaarde `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Onverwagte `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Ekstra karakters na instruksie"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Het \\ na `a', `c' or `i' verwag"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' soek nie 'n adres nie"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": soek nie 'n adres nie"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Kommentare aanvaar nie adresse nie"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Vermiste instruksie"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Instruksie gebruik slegs een adres"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Ongetermineerde adresregex"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Ongetermineerde `s' instruksie"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Ongetermineerde `y' instruksie"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Onbekende opsie vir `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "meervoudige `p' opsies vir `s' instruksie"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "meervoudige `g' opsies vir `s' instruksie"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "meervoudige nommeropsies vir `s' instruksie"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "nommeropsie vir `s' instruksie mag nie nul wees nie"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "stringe vir y-instruksie het verskillende lengtes"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "het nuwer sed-weergawe verwag"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Instruksie gebruik slegs een adres"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Onbekende instruksie:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: lêer %s lyn %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e uitdrukking #%lu, karakter %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Kan nie etiket vir sprong na `%s' kry nie"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: Kan nie %s lees nie: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Kon nie die lêer %s oopmaak nie: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Kon nie tydelike lêer %s oopmaak nie: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "fout in subproses"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opsie `e' word nie ondersteun nie"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "`e' instruksie word nie ondersteun nie"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Geen vorige regex nie"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Kan nie veranderaars vir leë regex spesifiseer nie"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Ongeldige regterhandsverwysing \\%d vir `s' instruksie"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" gebruik Perl 5 se regexsintaks in die skrip.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" gebruik Perl 5 se regexsintaks in die skrip.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Rapporteer foute per e-pos aan: %s .\n"
+"Sluit asb. die woord ``%s'' êrens in die onderwerplyn in.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed weergawe %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"gebaseer op GNU sed weergawe 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed weergawe %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Hierdie is vrye sagteware; raadpleeg die bronkode vir kopiëringsinligting. "
+"Daar is GEEN\n"
+"waarborg nie; nie eens vir BRUIKBAARHEID of GESKIKHEID VIR 'n SPESIFIEK "
+"DOEL,\n"
+"binne regsperke nie.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: Kan nie %s lees nie: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Kon nie die lêer %s oopmaak nie: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "kon nie %d item na %s skryf nie: %s"
+msgstr[1] "kon nie %d items na %s skryf nie: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "leesfout op %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: Kan nie %s lees nie: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sukses"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Geen paring"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Ongeldige regex"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Ongeldige kollasiekarakter"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Ongeldige karakterklasnaam"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Sleep terugstreep"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Ongeldige terugverwysing"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Ongepaarde [ of [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Ongepaarde ( or \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Ongepaarde \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ongeldige inhoud binne \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Ongeldige bereikseinde"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Geheue uitgeput"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Ongeldige vorige regex"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Premature einde vir regex"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regex te groot"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Ongepaarde ) of \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Geen vorige regex nie"
diff --git a/po/ca.po b/po/ca.po
new file mode 100644
index 0000000..69094a1
--- /dev/null
+++ b/po/ca.po
@@ -0,0 +1,446 @@
+# Catalan translation of sed.
+# Copyright © 2002, 2003, 2004 Free Software Foundation, Inc.
+# This file is distributed under the same license as the sed package.
+# Jordi Mallach <jordi@gnu.org>, 2002, 2003, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-10 05:51+0200\n"
+"Last-Translator: Jordi Mallach <jordi@gnu.org>\n"
+"Language-Team: Catalan <ca@dodds.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=n!=1;\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "múltiples «!»"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "«,» inesperada"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "no es pot utilitzar +N o ~N com a primera adreça"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "«{» no emparellat"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "«}» inesperat"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "hi ha caràcters extra després de l'ordre"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "s'espera \\ després de «a», «c» i «i»"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "«}» no accepta cap adreça"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": no accepta cap adreça"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "els comentaris no accepten cap adreça"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "cal una ordre"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "l'ordre utilitza només una adreça"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "l'expressió regular d'adreça no està terminada"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "ordre «s» no terminada"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "ordre «y» no terminada"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "opció desconeguda per a «s»"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "múltiples opcions «p» per a l'ordre «s»"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "múltiples opcions «g» per a l'ordre «s»"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "múltiples opcions numèriques per a l'ordre «s»"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "l'opció numèrica per a l'ordre «s» no pot ser zero"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "les cadenes per a l'ordre «y» són de longituds diferents"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "el caràcter delimitador no és un caràcter d'un byte"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "s'esperava una versió més nova de sed"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "ús de l'adreça de línia 0 invàlid"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "ordre desconeguda: «%c»"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fitxer %s línia %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expressió #%lu, caràcter %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "no es troba l'etiqueta per al salt a «%s»"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: no es pot llegir %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "no s'ha pogut editar %s: és un terminal"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "no s'ha pogut editar %s: no és un fitxer regular"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "no s'ha pogut obrir el fitxer temporal %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "s'ha produït un error en el subprocés"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "l'opció «e» no està suportada"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "l'ordre «e» no està suportada"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "no hi ha una expressió regular prèvia"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "no es poden especificar modificadors en expregs buides"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "referència \\%d no vàlida en el costat dret de l'ordre «s»"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usa la sintaxi d'expressions regulars de Perl 5 en aquesta\n"
+" sequència.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Forma d'ús: %s [OPCIÓ]... {script-només-si-no-hi-ha-altres-scripts}\n"
+" [fitxer-entrada]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" suprimeix la impressió automàtica de l'espai de patrons\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e script, --expression=script\n"
+" afegeix el script a les ordres a executar\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f fitxer-script, --file=fitxer-script\n"
+" afegeix els continguts de fitxer-script a les ordres a "
+"executar\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[SUFIX], --in-place[=SUFIX]\n"
+" edita els mateixos fitxers (fa còpia de seguretat si es\n"
+" proveeix una extensió)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" especifica la longitud desitjada per a l'ajust de final de\n"
+" línia per a l'ordre «l»\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" inhabilita totes les extensions GNU.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" usa expressions regulars exteses en el script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" considera els fitxers com independents, en compte d'un\n"
+" llarg flux continu.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" carrega una quantitat mínima de dades dels fitxers "
+"d'entrada\n"
+" i buida els búfers d'eixida més sovint\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help mostra aquest missatge d'ajuda i surt\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version mostra la informació de la versió i surt\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Si no es donen cap de les opcions -e, --expression, -f o --file, el primer\n"
+"argument no-opció es pren com el script sed a interpretar. Tots els "
+"arguments\n"
+"restants són noms de fitxers d'entrada; si no s'especifiquen fitxers "
+"d'entrada,\n"
+"es llegeix l'entrada estàndard.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Envieu informes d'error a: %s.\n"
+"Assegureu-vos d'incloure la paraula «%s» en alguna part del camp "
+"«Assumpte:».\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versió %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"basat en GNU sed versió %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versió %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Aquest és programari lliure; vegeu el codi font per les condicions\n"
+"de còpia. No hi ha CAP garantia; ni tan sols de COMERCIABILITAT o\n"
+"ADEQUACIÓ A UN PROPÒSIT PARTICULAR, fins on ho permeta la llei.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "no es pot eliminar %s: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "no s'ha pogut obrir el fitxer %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "no s'ha pogut escriure %d element a %s: %s"
+msgstr[1] "no s'han pogut escriure %d elements a %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "error de lectura en %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "no es pot reanomenar %s: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Èxit"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Sense parella"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expressió regular no vàlida"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "El caràcter de comparació no és vàlid"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nom de classe de caràcter no vàlid"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Barra invertida al final"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "La referència cap enrere no és vàlida"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ o [^ no emparellat"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "«(» o \\( no emparellat"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ no emparellat"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "El contingut de \\{\\} no és vàlid"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "El rang final no és vàlid"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memòria exhaurida"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "L'expressió regular precedent no vàlida"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Fi prematur de l'expressió regular"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "L'expressió regular és massa gran"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") o \\) no emparellat"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "No hi ha una expressió regular prèvia"
diff --git a/po/cs.po b/po/cs.po
new file mode 100644
index 0000000..f041a1f
--- /dev/null
+++ b/po/cs.po
@@ -0,0 +1,425 @@
+# Czech translations for GNU sed package.
+# Copyright (C) 1998 Free Software Foundation, Inc.
+# Jaroslav Fojtik <fojtik@cmp.felk.cvut.cz>, 1998.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 3.02.80\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2001-08-05 19:52+02:00\n"
+"Last-Translator: Vladimir Michl <Vladimir.Michl@seznam.cz>\n"
+"Language-Team: Czech <cs@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Vícenásobný `!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Neoèekáváná `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "+N nebo ~N nelze pou¾ít jako první adresu"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Neodpovídající `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Neoèekávaná `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Nadbyteèné znaky po pøíkazu"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' nevy¾aduje jakoukoliv adresu"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": nechce jakoukoliv adresu"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "V komentáøi není pøípustná jakákoliv adresa"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Chybìjící pøíkaz"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Pøíkaz pou¾ívá pouze jedinou adresu"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Neukonèená adresa regulárního výrazu"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Neukonèený pøíkaz `s'"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Neukonèený pøíkaz `y'"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Neznámý pøepínaè pro `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "vícenásobné pou¾ití pøepínaèe `p' s pøíkazem `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "vícenásobné pou¾ití pøepínaèe `g' s pøíkazem `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "pøíkaz `s' mù¾e mít maximálnì jednu èíselnou volbu"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "èíselná volba pøíkazu `s' nemù¾e být nula"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "øetìzce pro pøíkaz `y' musí být stejnì dlouhé"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Chybné pou¾ití adresy modifikátoru"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Neznámý pøíkaz:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: soubor %s, øádek %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e výraz #%lu, znak %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Návì¹tí pro skok na `%s' nelze najít"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s nelze èíst: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Soubor %s nelze otevøít"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Soubor %s nelze otevøít"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr ""
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Chyby v programu oznamujte na adrese: %s (anglicky).\n"
+"Kamkoliv do polo¾ky ``Subject:'' vlo¾te ``%s''.\n"
+"Pøipomínky k pøekladu zasílejte na adresu <cs@li.org> (èesky).\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+" %s\n"
+" Toto je volné programové vybavení; podmínky pro kopírování a roz¹iøování\n"
+"naleznete ve zdrojových textech. Toto programové vybavení je zcela BEZ "
+"ZÁRUKY,\n"
+"a to i bez záruky PRODEJNOSTI nebo VHODNOSTI PRO NÌJAKÝ KONKRÉTNÍ ÚÈEL.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s nelze èíst: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Soubor %s nelze otevøít"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%d polo¾ek nelze do %s zapsat: %s"
+msgstr[1] "%d polo¾ek nelze do %s zapsat: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "chyba pøi ètení z %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s nelze èíst: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr ""
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "Neodpovídající `{'"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "Neodpovídající `{'"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "Neodpovídající `{'"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr ""
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr ""
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr ""
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "Neodpovídající `{'"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr ""
diff --git a/po/da.po b/po/da.po
new file mode 100644
index 0000000..c137158
--- /dev/null
+++ b/po/da.po
@@ -0,0 +1,430 @@
+# Danish messages for sed
+# Copyright (C) 2001 Free Software Foundation, Inc.
+# Byrial Ole Jensen <byrial@image.dk>, 2001-2003.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-10-25 08:00+0200\n"
+"Last-Translator: Byrial Ole Jensen <byrial@image.dk>\n"
+"Language-Team: Danish <dansk@klid.dk>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n != 1;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Flere '!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Uventet ','"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Kan ikke bruge +N eller ~N som første adresse"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Uparret '{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Uventet '}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Ekstra tegn efter kommando"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Forventede \\ efter 'a', 'c' eller 'i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "'}' vil ikke have adresser"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": vil ikke have adresser"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Kommentarer vil ikke have adresser"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Der mangler en kommando"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Kommandoen bruger kun én adresse"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Uafsluttet regulært udtryk for adresse"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Uafsluttet 's'-kommando"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Uafsluttet 'y'-kommando"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Ukendt tilvalg til 's'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "Flere 'p'-flag til 's'-kommando"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "Flere 'g'-flag til 's'-kommando"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "Flere tal-flag til 's'-kommando"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "s-kommandoens tal-flag må ikke være nul"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "y-kommandoens strenge har forskellige længder"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "forventede en nyere version af sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Forkert brug af adresse-ændrer"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Ukendt kommando:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fil %s, linje %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e udtryk nr. %lu, tegn %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Kan ikke finde etiket for hop til '%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: kan ikke læse %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "kunne ikke åbne filen %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "kunne ikke åbne midlertidig fil %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "fejl i underproces"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "tilvalg 'e' er ikke understøttet"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "'e'-kommando er ikke understøttet"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Intet forudgående regulært udtryk"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Der kan ikke angives ændrere til tomt regulært udtryk"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Ugyldig reference \\%d på 's'-kommandos højreside"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" brug Perl 5's syntaks for regulære udtryk i skriptet\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" brug Perl 5's syntaks for regulære udtryk i skriptet\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Send fejlrapporter på engelsk pr. e-post til %s.\n"
+"Sørg venligst for at skrive ordet \"%s\" et sted i \"Subject:\"-feltet.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed version %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"baseret på GNU sed version 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed version %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Dette er frit programmel; se kildeteksten for kopieringsbetingelser.\n"
+"Der er INGEN garanti; end ikke for SALGBARHED eller EGNETHED TIL NOGET\n"
+"BESTEMT FORMÅL, i det omfang som loven tillader.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: kan ikke læse %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "kunne ikke åbne filen %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "kunne ikke skrive %d element til %s: %s"
+msgstr[1] "kunne ikke skrive %d elementer til %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "læsefejl på %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: kan ikke læse %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Godt resultat"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Intet resultat"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Ugyldigt regulært udtryk"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Ugyldigt sorteringstegn"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Ugyldigt navn på tegnklasse"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Afsluttende omvendt skråstreg"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Ugyldig reference bagud"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Uparret [ eller [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Uparret ( eller \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Uparret \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ugyldigt indhold af \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Ugyldig slutning på område"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Hukommelsen opbrugt"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Ugyldigt forudgående regulært udtryk"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Ufuldstændigt regulært udtryk"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regulært udtryk for stort"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Uparret ) eller \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Intet forudgående regulært udtryk"
diff --git a/po/de.po b/po/de.po
new file mode 100644
index 0000000..2e6d6f8
--- /dev/null
+++ b/po/de.po
@@ -0,0 +1,435 @@
+# sed german translation
+# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+# Walter Koch <koch@u32.de>, 2001, 2002, 2003
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.6\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-03-21 22:03:41+0100\n"
+"Last-Translator: Walter Koch <koch@u32.de>\n"
+"Language-Team: German <de@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Mehrfache `!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Unerwartetes `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "+N oder ~N können nicht als erste Adresse benutzt werden"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Nicht paarweises `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Unerwartetes `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Zusätzliche Zeichen nach dem Befehl"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Nach `a', `c' oder `i' wird \\ erwartet"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' erwartet keine Adressen"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr "`:' erwartet keine Adressen"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Kommentare erlauben keine Adressen"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Fehlender Befehl"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Befehl verwendet nur eine Adresse"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Nicht beendeter regulärer Adressausdruck"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Nicht beendeter `s'-Befehl"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Nicht beendeter `y'-Befehl"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Unbekannte Option betreffs `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "Mehrere 'p'-Optionen am `s'-Befehl"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "Mehrere 'g'-Optionen am `s'-Befehl"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "Mehrere numerische Optionen am `s'-Befehl"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "Numerisch Option am `s'-Befehl darf nicht Null sein"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "Unterschiedliche Länge der Zeichenketten am `y'-Befehl"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "Neuere Version von sed erwartet"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Befehl verwendet nur eine Adresse"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Unbekannter Befehl:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: Datei %s Zeile %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e Ausdruck #%lu, Zeichen %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Kann das Ziel für den Sprung nach `%s' nicht finden"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: kann %s nicht lesen: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Datei %s kann nicht geöffnet werden: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Datei %s kann nicht geöffnet werden: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "Fehler im Subprozess"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "Option `e' wird nicht unterstützt"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "`e'-Kommando wird nicht unterstützt"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Kein vorheriger regulärer Ausdruck"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Auf leere reguläre Ausdrücke können keine `modifier' angewandt werden"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Ungültiger Verweis \\%d in den Haltepuffer des `s'-Befehls"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" Verwende die Perl 5 - Syntax für reg.Ausdrücke im Script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" Verwende die Perl 5 - Syntax für reg.Ausdrücke im Script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Fehlerberichte (in Englisch!) per E-Mail an: %s .\n"
+"Verwenden Sie dabei den Begriff ``%s'' irgendwo in der ``Betreff:''-Zeile.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "Super-sed version %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"basiert auf GNU sed Version 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed Version %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"(Der folgende Text ist eine nicht überprüfte Übersetzung, die zur\n"
+" Information dient; in rechtlichen Fragen ist immer das englische\n"
+" Original ausschlaggebend)\n"
+"\n"
+"Dieses Program ist freie Software; In den Quelldateien können Sie die\n"
+"Bedingungen für die Weitergabe nachlesen.\n"
+"Es gibt KEINE GARANTIE; nicht einmal die implizite Garantie der\n"
+"MARKTFÄHIGKEIT oder der ERFÜLLUNG EINES BESTIMMTEN ZWECKES.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: kann %s nicht lesen: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Datei %s kann nicht geöffnet werden: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "Kann %d Feld nicht auf %s schreiben: %s"
+msgstr[1] "Kann %d Felder nicht auf %s schreiben: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "Lesefehler in %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: kann %s nicht lesen: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Erfolgreich"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Keine Übereinstimmung"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Ungültiger regulärer Ausdruck"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Ungültiger Vergleichszeichen"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Ungültige Zeichenklassenname"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Abschliessender Backslash"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Ungültiger Rückwärtsverweis"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Nicht paarweises [ bzw. [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Nicht paarweises ( bzw. \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Nicht paarweises \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ungültiger Inhalt in \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Ungültiges Bereichende"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Speicher erschöpft"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Vorheriger regulärer Ausdruck ist ungültig"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Regulärer Ausdruck endet zu früh"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regulärer Ausdruck ist zu groß"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Nicht paarweises ) bzw. \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Kein vorheriger regulärer Ausdruck"
diff --git a/po/el.po b/po/el.po
new file mode 100644
index 0000000..08d1050
--- /dev/null
+++ b/po/el.po
@@ -0,0 +1,435 @@
+# Greek messages for GNU sed.
+# Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Simos Xenitellis <simos@hellug.gr>, 1998, 1999, 2000, 2001, 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU sed 3.02.80\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-03-08 12:57+0000\n"
+"Last-Translator: Simos Xenitellis <simos@hellug.gr>\n"
+"Language-Team: Greek <nls@tux.hellug.gr>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-7\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "ÐïëëáðëÜ `!'s"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "ÁíáðÜíôå÷ï `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Äåí åßíáé äõíáôÞ ç ÷ñÞóç ôùí +N Þ ~N ãéá ðñþôç äéåýèõíóç"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Áôáßñéáóôï `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Áôáßñéáóôï `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "ÅðéðëÝïí ÷áñáêôÞñåò ìåôÜ ôçí åíôïëÞ"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Áðáéôåßôáé \\ ìåôÜ ôéò åíôïëÝò `a', `c' Þ `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "ôï `}' äåí ÷ñåéÜæåôáé äéåõèýíóåéò"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": äåí ÷ñåéÜæåôáé äéåõèýíóåéò"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Ôá ó÷üëéá äåí äÝ÷ïíôáé äéåõèýíóåéò"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Ëåßðåé ç åíôïëÞ"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Ç åíôïëÞ ÷ñçóéìïðïéåß ìüíï ìéá äéåýèõíóç"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Ìç ôåñìáôéóìÝíç äéåýèõíóç êáíïíéêÞò Ýêöñáóçò"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Ìç ôåñìáôéóìÝíç åíôïëÞ `s'"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Ìç ôåñìáôéóìÝíç åíôïëÞ `y'"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "¶ãíùóôç åðéëïãÞ ãéá ôï `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "ðïëëáðëÝò åðéëïãÝò `p' óôçí åíôïëÞ `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "ðïëëáðëÝò åðéëïãÝò `g' óôçí åíôïëÞ `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "ðïëëáðëüò åðéëïãÝò áñéèìïý óôçí åíôïëÞ `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "ç åðéëïãÞ áñéèìïý óôçí åíôïëÞ `s' äåí ìðïñåß íá åßíáé ìçäÝí"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "ôá áëöáñéèìçôéêÜ ãéá ôçí åíôïëÞ `y' åßíáé äéáöïñåôéêïý ìåãÝèïõò"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "ÁêáôÜëëçëç ÷ñÞóç ôïõ äéáìïñöùôÞ äéåýèõíóçò"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "¶ãíùóôç åíôïëÞ:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: áñ÷åßï %s ãñáììÞ %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e Ýêöñáóç #%lu, ÷áñáêôÞñáò %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Äåí Þôáí äõíáôÞ ç åýñåóç åôéêÝôôáò ãéá ìåôÜâáóç óôï `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: áäõíáìßá óôçí áíÜãíùóç %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Äåí ìðüñåóá íá áíïßîù ôï áñ÷åßï %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Äåí ìðüñåóá íá áíïßîù ôï áñ÷åßï %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "óöÜëìá óôç õðïäéáäéêáóßá"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "äåí õðïóôçñßæåôáé ç åðéëïãÞ `e'"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "äåí õðïóôçñßæåôáé ç åíôïëÞ `e'"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "äåí åðéôñÝðåôáé ï ïñéóìüò äéáìïñöùôþí óå êåíÞ êáíïíéêÞ Ýêöñáóç"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Ìç Ýãêõñç áíáöïñÜ \\%d óôï äåîß ôìÞìá ôçò åíôïëÞò `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"ÁíáöïñÝò óöáëìÜôùí ìÝóù çëåêôñïíéêïý ôá÷õäñïìåßïõ óôï: %s .\n"
+"ÐñïóÝîôå íá óõìðåñéëÜâåôå ôç ëÝîç ``%s'' êÜðïõ óôï ðåäßï ``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Áõôü ôï ðñüãñáììá åßíáé åëåýèåñï ëïãéóìéêü· äåßôå ôïí ðçãáßï êþäéêá ãéá "
+"ôïõò\n"
+"êáíïíéóìïýò áíôéãñáöÞò. Äåí õðÜñ÷åé ÊÁÌÉÁ ÅÃÃÕÇÓÇ· ïýôå áêüìá ãéá\n"
+"ËÅÉÔÏÕÑÃÉÊÏÔÇÔÁ Þ ÊÁÔÁËËÇËÏÔÇÔÁ ÃÉÁ ÅÍÁ ÓÕÃÊÅÊÑÉÌÅÍÏ ÓÊÏÐÏ, ìÝ÷ñé ôï óçìåßï\n"
+"ðïõ åðéôñÝðåé ï íüìïò.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: áäõíáìßá óôçí áíÜãíùóç %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Äåí ìðüñåóá íá áíïßîù ôï áñ÷åßï %s"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "áäýíáôç ç åããñáöÞ %d óôïé÷åßùí óôï %s: %s"
+msgstr[1] "áäýíáôç ç åããñáöÞ %d óôïé÷åßùí óôï %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "óöÜëìá áíÜãíùóçò óôï %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: áäõíáìßá óôçí áíÜãíùóç %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+#, fuzzy
+msgid "Invalid regular expression"
+msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç"
+
+#: lib/regcomp.c:159
+#, fuzzy
+msgid "Invalid collation character"
+msgstr "ìç Ýãêõñï áêïëïõèßá äéáöõãÞò óå êëÜóç ÷áñáêôÞñùí"
+
+#: lib/regcomp.c:162
+#, fuzzy
+msgid "Invalid character class name"
+msgstr "ìç Ýãêõñï áêïëïõèßá äéáöõãÞò óå êëÜóç ÷áñáêôÞñùí"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "Áôáßñéáóôï `{'"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "Áôáßñéáóôï `{'"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "Áôáßñéáóôï `{'"
+
+#: lib/regcomp.c:180
+#, fuzzy
+msgid "Invalid content of \\{\\}"
+msgstr "ìç Ýãêõñï ðåñéå÷üìåíï äåéêôþí åðáíÜëçøçò {}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+#, fuzzy
+msgid "Invalid preceding regular expression"
+msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç"
+
+#: lib/regcomp.c:192
+#, fuzzy
+msgid "Premature end of regular expression"
+msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç"
+
+#: lib/regcomp.c:195
+#, fuzzy
+msgid "Regular expression too big"
+msgstr "ðïëý ìåãÜëç êáíïíéêÞ Ýêöñáóç"
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "Áôáßñéáóôï `{'"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Äåí âñÝèçêå ðñïçãïýìåíç êáíïíéêÞ Ýêöñáóç"
diff --git a/po/eo.po b/po/eo.po
new file mode 100644
index 0000000..1ab5220
--- /dev/null
+++ b/po/eo.po
@@ -0,0 +1,430 @@
+# Esperantaj mesaÄoj por GNU sed.
+# Copyright (C) 2002 Free Software Foundation, Inc.
+# Edmund GRIMLEY EVANS <edmundo@rano.org>, 2001-2003.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-10-26 20:57+0000\n"
+"Last-Translator: Edmund GRIMLEY EVANS <edmundo@rano.org>\n"
+"Language-Team: Esperanto <translation-team-eo@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Pluraj '!'oj"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Neatendita ','"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Ne eblas uzi +N aÅ­ ~N kiel unuan adreson"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Neparigita '{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Neatendita '}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Kromaj signoj post komando"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Atendita \\ post 'a', 'c' aÅ­ 'i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "'}' ne deziras adresojn"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ne deziras adresojn"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Komentoj ne akceptas adresojn"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Mankas komando"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Komando uzas nur unu adreson"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Nefinita adresa regesp"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Nefinita komando 's'"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Nefinita komando 'y'"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Nekonata opcio por 's'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "pluraj 'p'-opcioj por komando 's'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "pluraj 'g'-opcioj por komando 's'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "pluraj nombro-opcioj por komando 's'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "nombro-opcio por komando 's' ne povas esti nul"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "ĉenoj por komando 'y' havas malsamajn longojn"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "atendis pli novan version de sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Nevalida uzo de adresmodifilo"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Nekonata komando:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: dosiero %s linio %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e esprimo #%lu, signo %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Ne povas trovi etikedon por salto al '%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ne povas legi %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Ne povis malfermi dosieron %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Ne povis malfermi dumtempan dosieron %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "eraro en subprocezo"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opcio 'e' ne realigita"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "komando 'e' ne realigita"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Mankas antaÅ­a regula esprimo"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Ne eblas specifi modifilojn ĉe malplena regula esprimo"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Nevalida referenco \\%d ĉe dekstra flanko de komando 's'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" uzi sintakso de Perl 5 por regulaj esprimoj en programo.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" uzi sintakso de Perl 5 por regulaj esprimoj en programo.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"RetpoÅtu cimo-raportojn al: %s .\n"
+"Nepre menciu la vorton '%s' ie en la temlinio.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versio %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"bazita sur \"GNU sed\" versio 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versio %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Ĉi tio estas libera programo; vidu la fonton por kopi-kondiĉoj. Estas\n"
+"NENIA GARANTIO; eĉ ne por KOMERCA KVALITO aŭ ADEKVATECO POR DIFINITA CELO,\n"
+"laÅ­ la grado permesita de juro.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: ne povas legi %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Ne povis malfermi dosieron %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "ne povis skribi %d eron al %s: %s"
+msgstr[1] "ne povis skribi %d erojn al %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "legeraro ĉe %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: ne povas legi %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sukceso"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Maltrafo"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Nevalida regula esprimo"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Nevalida kunfanda signo"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nevalida nomo de signoklaso"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Malsuprenstreko ĉe fino"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Nevalida retroreferenco"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Neparigita [ aÅ­ [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Neparigita ( aÅ­ \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Neparigita \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Nevalida enhavo de \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Nevalida fino de gamo"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Mankas memoro"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Nevalida antaÅ­a regula esprimo"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Neatendita fino de regula esprimo"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regula esprimo tro granda"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Neparigita ) aÅ­ \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Mankas antaÅ­a regula esprimo"
diff --git a/po/es.po b/po/es.po
new file mode 100644
index 0000000..250ad65
--- /dev/null
+++ b/po/es.po
@@ -0,0 +1,433 @@
+# Mensajes en español para GNU sed.
+# Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+# Cristian Othón Martínez Vera <cfuga@itam.mx>, 2001, 2002, 2003.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-10-24 12:38-0500\n"
+"Last-Translator: Cristian Othón Martínez Vera <cfuga@itam.mx>\n"
+"Language-Team: Spanish <es@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "'!'s múltiples"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "`,' inesperada"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "No se pueden usar +N o ~N como primera dirección"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' sin pareja"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "`}' inesperado"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Caracteres extra después de la orden"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Se esperaba \\ después de `a', `c' ó `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' no acepta ninguna dirección"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": no acepta ninguna dirección"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Los comentarios no aceptan ninguna dirección"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Orden faltante"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "La orden solamente usa una dirección"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Dirección de expresión regular sin terminar"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Orden `s' sin terminar"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Orden `y' sin terminar"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Opción desconocida para `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "múltiples opciones `p' para la orden `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "múltiples opciones `g' para la orden `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "múltiples opciones numéricas para la orden `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "una opción numérica para la orden `s' no puede ser cero"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "las cadenas para la orden y son de longitudes diferentes"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "se esperaba una versión más reciente de sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Uso inválido de un modificador de dirección"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Orden desconocida:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fichero %s línea %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expresión #%lu, carácter %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "No se puede encontrar la etiqueta para saltar a `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: no se puede leer %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "No se puede abrir el fichero %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "No se puede abrir el fichero temporal %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "error en el subproceso"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "no hay soporte para la opción `e'"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "no hay soporte para el comando `e'"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "No hay una expresión regular previa"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "No se pueden especificar modificadores en expresiones regulares vacías"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Referencia \\%d inválida en el lado derecho del comando `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" utilizar la sintaxis de expresiones regulares de Perl 5\n"
+" en el guión.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" utilizar la sintaxis de expresiones regulares de Perl 5\n"
+" en el guión.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Envíe reportes de bichos por e-mail a: %s .\n"
+"Asegúrese de incluir la palabra ``%s'' en algún lugar en el campo "
+"``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versión %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"basado en GNU sed versión 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versión %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Esto es software libre; vea el código fuente para las condiciones de copia.\n"
+"No hay NINGUNA garantía; ni siquiera de COMERCIABILIDAD o IDONEIDAD PARA UN\n"
+"FIN DETERMINADO, en la extensión permitida por ley.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: no se puede leer %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "No se puede abrir el fichero %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "no se puede escribir %d elemento a %s: %s"
+msgstr[1] "no se pueden escribir %d elementos a %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "error al leer de %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: no se puede leer %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Éxito"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "No hay coincidencia"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expresion regular inválida"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Carácter de ordenamiento inválido"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Carácter de nombre de clase inválido"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Diagonal invertida al final"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Referencia hacia atrás inválida"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ ó [^ sin pareja"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( ó \\( sin pareja"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ sin pareja"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Contenido inválido de \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Final de rango inválido"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memoria agotada"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Expresión regular precedente inválida"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Final prematuro de la expresión regular"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Expresión regular demasiado grande"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") ó \\) sin pareja"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "No hay una expresión regular previa"
diff --git a/po/et.po b/po/et.po
new file mode 100644
index 0000000..0e11cd2
--- /dev/null
+++ b/po/et.po
@@ -0,0 +1,436 @@
+# Estonian translations for GNU sed.
+# Copyright (C) 2001 Free Software Foundation, Inc.
+# Toomas Soome <Toomas.Soome@microlink.ee>, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-09 16:33+0300\n"
+"Last-Translator: Toomas Soome <Toomas.Soome@microlink.ee>\n"
+"Language-Team: Estonian <et@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-15\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "korduv `!'"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "ootamatu `,'"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "+N või ~N ei või kasutada esimese aadressina"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "liigne `{'"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "ootamatu `}'"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "lisasümbolid peale käsku"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "peale `a', `c' või `i' peab olema \\"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' ei vaja aadresse"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ei vaja aadresse"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "kommentaarid ei vaja aadresse"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "käsk puudub"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "käsk kasutab vaid üht aadressi"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "lõpetamata aadressi avaldis"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "lõpetamata `s' käsk"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "lõpetamata `y' käsk"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "tundmatu võti `s' käsule"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "korduv `p' võti `s' käsus"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "korduv `g' võti `s' käsus"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "korduvad numbrivõtmed `s' käsus"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "numbrivõti `s' käsus ei või olla null"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "sõned käsus `y' on erineva pikkusega"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "eraldav sübol ei ole ühe-baidiline sümbol"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "oodati sedi uuemat versiooni"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "vigane rea aadressi 0 kasutamine"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "tundmatu käsk: `%c'"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fail %s rida %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e avaldis #%lu, sümbol %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "ei leia märgendit, et hüpata kohale `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ei saa lugeda %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "%s ei saa toimetada: see on terminal"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "%s ei saa toimetada: see ei ole tavaline fail"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "ajutist faili %s ei saa avada: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "viga alamprotsessis"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "võtit `e' ei toetata"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "käsku `e' ei toetata"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "eelmist regulaaravaldist pole"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "muudatusi tühjale regulaaravaldisele ei saa määrata"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "vigane viide \\%d käsu `s' paremas pooles"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" kasuta skriptis Perl 5 regulaaravaldiste süntaksit.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Kasutamine: %s [võti]... {ainult-skript-kui-teisi-skripte-pole} [sisend-"
+"fail]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" keela mustriruumi automaatne väljastamine\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e skript, --expression=skript\n"
+" lisa täidetavate käskluste skript\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f skripti-fail, --file=skripti-fail\n"
+" lisa skripti-faili sisu täidetavate käskluste hulka\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[SUFIKS], --in-place[=SUFIKS]\n"
+" toimeta faile (kui kasutati sifiksit, loob ka varukoopia)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" määra `l' käsule soovitatav rea pikkus\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" blokeeri kõik GNU laiendused.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" kasuta skriptis laiendatud regulaaravaldiste süntaksit.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" käsitle faile ükshaaval, mitte ühe jätkuva voona.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" loe sisendfailist minimaalne kogus andmeid ja tühjenda\n"
+" väljundpuhvreid sagedamini\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help väljast see abiinfo ja lõpeta töö\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version väljasta versiooniinfo ja lõpeta töö\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Kui võtmeid -e, --expression, -f või --file ei kasutata, loetakse\n"
+"esimene argument, mis pole võti, sed skriptiks. Kõik järgnevad argumendid "
+"on\n"
+"sisendfailide nimed; kui sisendfaile ei antud, loetakse standardsisendit.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Postitage teated vigadest: %s .\n"
+"Lisage kindlasti sõna ``%s'' ``Subject:'' reale.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versioon %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"põhineb GNU sed versioonil %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versioon %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"See on vaba tarkvara; kopeerimistingimused leiate lähtetekstidest. Garantii\n"
+"PUUDUB; ka müügiks või mingil eesmärgil kasutamiseks, vastavalt seadustega\n"
+"lubatud piiridele.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s ei saa eemaldada: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "faili %s ei saa avada: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%d elemendi faili %s kirjutamine ebaõnnestus: %s"
+msgstr[1] "%d elemendi faili %s kirjutamine ebaõnnestus: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "lugemisviga %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s ei saa ümber nimetada: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Edukas"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Ei leia"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Vigane regulaaravaldis"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Vigane sortimise sümbol"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Vigane sümbolite klassi nimi"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Lõpetav langkriips"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Vigane tagasi viide"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Puudub [ või [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Puudub ( või \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Puudub \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Vigane \\{\\} sisu"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Vigane vahemiku lõpp"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Mälu on otsas"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Vigane eelnev regulaaravaldis"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Ootamatu regulaaravaldise lõpp"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regulaaravaldis on liiga suur"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Puudub ) või \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Eelmist regulaaravaldist pole"
diff --git a/po/fi.po b/po/fi.po
new file mode 100644
index 0000000..d63f3d3
--- /dev/null
+++ b/po/fi.po
@@ -0,0 +1,433 @@
+# Finnish translations for GNU sed.
+# Copyright © 2002 Free Software Foundation, Inc.
+# Sami J. Laine <sami.laine@iki.fi>, 2002
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed-4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-12-07 09:35+0200\n"
+"Last-Translator: Sami J. Laine <sami.laine@iki.fi>\n"
+"Language-Team: Finnish <translation-team-fi@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-15\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n != 1;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Useita \"!\"-merkkejä"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Odottamaton \",\""
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Ensimmäisenä osoitteena ei voi olla +N tai ~N"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Pariton \"{\""
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Pariton \"}\""
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Ylimääräisiä merkkejä komennon jälkeen"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "\\ odotettiin merkkien `a', `c' tai `i' jälkeen"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "\"}\" ei tarvitse osoitteita"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ei tarvitse osoitteita"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Kommentit eivät hyväksy osoitteita"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Puuttuva komento"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Komento käyttää vain yhtä osoitetta"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Päättymätön osoite vakiolauseessa"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Päättymätön \"s\"-komento"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Päättymätön \"y\"-komento"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Tuntematon valitsin \"s\":lle"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "useita \"p\"-valitsimia \"s\"-komennolle"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "useita \"g\"-valitsimia \"s\"-komennolle"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "useita valitsimia \"s\"-komennolle"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "numeerinen valitsin \"s\"-komennolle ei voi olla nolla"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "merkkijonot \"y\"-komennolle ovat pituudeltaan vaihtelevia"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "odotettiin uudempaa versiota sed:stä"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Komento käyttää vain yhtä osoitetta"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Tuntematon komento:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: tiedosto %s rivi %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e lauseke #%lu, merkki %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Nimikettä hypylle kohteeseen \"%s\" ei löydy"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ei voida lukea syötettä %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Tiedostoa %s ei voitu avata: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Väliaikaistiedostoa %s ei voitu avata: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "virhe lapsiprosessissa"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "valitsin `e' ei ole tuettu"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "komento `e' ei ole tuettu"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Ei aikaisempaa säännöllistä lausetta"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Muuttajia ei voida määritellä tyhjään säännöliseen lausekkeeseen"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Virheellinen viittaus \\%d komennon `s' oikealla puolella"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" käytä Perl 5:en mukaista säännöllisten lauseiden\n"
+" syntaksia skriptissä.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" käytä Perl 5:en mukaista säännöllisten lauseiden\n"
+" syntaksia skriptissä.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Lähetä virheraportit osoitteeseen %s .\n"
+"Sisällytä sana \"%s\" viestin aihekenttään (\"Subject\"-kenttään).\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versio %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"perustuu GNU sed versioon 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versio %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Tämä ohjelma on vapaa ohjelmisto; tarkista jakeluehdot lähdekoodista.\n"
+"Tälle ohjelmalle ei anneta minkäänlaista takuuta; ei edes takuuta\n"
+"kaupallisesti hyväksyttävästä laadusta tai soveltuvuudesta tiettyyn\n"
+"tarkoitukseen.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: ei voida lukea syötettä %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Tiedostoa %s ei voitu avata: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%d kohdetta ei voitu kirjoittaa tulosteeseen %s: %s"
+msgstr[1] "%d kohdetta ei voitu kirjoittaa tulosteeseen %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "lukuvirhe syötteessä %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: ei voida lukea syötettä %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Onnistui"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Ei osumaa"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Virheellinen säännöllinen lauseke"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Virheellinen vertailumerkki"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Virhellinen merkkiluokan nimi"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Seuraava kenoviiva"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Virheellinen takaisinviittaus"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Pariton \"[\" tai \"[^\""
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Pariton \"(\" tai \"\\(\""
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Pariton \"\\{\""
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Virheellinen sisältö \\{\\}:ssä"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Virheellinen välin loppu"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Muisti loppu"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Virheellinen edeltävä säännöllinen lauseke"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Ennenaikainen säännöllisen lausekkeen loppu"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Säännöllinen lauseke on liian suuri"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Pariton \")\" tai \"\\)\""
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Ei aikaisempaa säännöllistä lausetta"
diff --git a/po/fr.po b/po/fr.po
new file mode 100644
index 0000000..8d53c8c
--- /dev/null
+++ b/po/fr.po
@@ -0,0 +1,453 @@
+# French translation of GNU sed.
+# Copyright (C) 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+# Gaël Quéri <gael@lautre.net>, 1998.
+#
+# J'ai préféré utiliser le terme <<Expression régulière>> plutôt
+# qu'<<expression rationnelle>> car celui-là est moins déroutant
+# pour ceux qui sont habitués à la formulation anglaise
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-12 00:09+0200\n"
+"Last-Translator: Gaël Quéri <gael@lautre.net>\n"
+"Language-Team: French <traduc@traduc.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "`!' multiples"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "`,' inattendue"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "utilisation invalide de +N ou ~N comme première adresse"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "`{' non refermée"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "`}' inattendu"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "caractères inutiles après la commande"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "\\ attendu après `a', `c' ou `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' n'a besoin d'aucune adresse"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": n'a besoin d'aucune adresse"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "les commentaires n'acceptent aucune adresse"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "commande manquante"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "la commande n'utilise qu'une adresse"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "expression régulière d'adresse inachevée"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "commande `s' inachevée"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "commande `y' inachevée"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "option inconnue pour `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "plusieurs options `p' à la commande `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "plusieurs options `g' à la commande `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "plusieurs options numériques à la commande `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "l'option numérique de la comande `s' ne peut être nulle"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "les chaînes destinées à la commande `y' ont des longueurs différentes"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "le caractère délimiteur n'est pas un caractère à un seul octet"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "une version plus récente de sed est attendue"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "utilisation invalide de l'adresse de ligne 0"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "commande inconnue: `%c'"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fichier %s ligne %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expression n°%lu, caractère %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "impossible de trouver l'étiquette pour sauter à `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: impossible de lire %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "impossible d'éditer %s: est un terminal"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "impossible d'éditer %s: ce n'est pas un fichier régulier"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "impossible d'ouvrir le fichier temporaire %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "erreur dans le sous-processus"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "l'option `e' n'est pas supportée"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "la commande `e' n'est pas supportée"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "pas d'expression régulière précédente"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+"impossible de spécifier des modifieurs sur une expression\n"
+"rationnelle vide"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "référence \\%d invalide dans le côté droit de la commande `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" utiliser la syntaxe des expressions régulières\n"
+" de Perl 5 dans le script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Utilisation: %s [OPTION]... {script-seulement-si-pas-d'autre-script}\n"
+"[fichier-d'entrée]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" supprimer l'écriture automatique de l'espace des motifs\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e script, --expression=script\n"
+" ajouter le script aux commandes à être exécutées\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f fichier-script, --file=fichier-script\n"
+" ajouter le contenu de fichier-script aux commandes\n"
+" à être exécutées\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[SUFFIXE], --in-place[=SUFFIXE]\n"
+" éditer les fichiers à leur place (fait une\n"
+" sauvegarde si l'extension est fournie)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" spécifier la longueur de coupure de ligne désirée pour la\n"
+" commande `l'\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" désactiver toutes les extensions GNU.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" utiliser la syntaxe des expressions régulières\n"
+" étendues dans le script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" considérer les fichiers comme séparés plutôt que comme un\n"
+" simple flux long et continu.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" charger des quantités minimales de données depuis les\n"
+" fichiers d'entrée et libérer les tampons de sortie plus\n"
+" souvent\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help afficher cette aide et sortir\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+" --version afficher les informations de version du logiciel et sortir\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Si aucune option -e, --expression, -f ou --file n'est donnée, le\n"
+"premier argument qui n'est pas une option sera pris comme étant le script\n"
+"sed à interpréter. Tous les arguments restants sont les noms des fichiers\n"
+"d'entrée; si aucun fichier d'entrée n'est spécifiée, l'entrée standard\n"
+"est lue.\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Rapporter toutes anomalies à: %s.\n"
+"N'oubliez pas d'inclure le mot ``%s'' quelque-part dans la zone "
+"``Subject:''\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed version %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"fondé sur GNU sed version %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed version %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Ce logiciel est libre; voir les sources pour les conditions de "
+"reproduction.\n"
+"AUCUNE garantie n'est donnée; y compris pour des RAISONS COMMERCIALES ou\n"
+"pour RÉPONDRE A UN BESOIN PARTICULIER, à l'étendue permise par la loi.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "impossible de supprimer %s: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "impossible d'ouvrir le fichier %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "impossible d'écrire %d item à %s: %s"
+msgstr[1] "impossible d'écrire %d items à %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "erreur de lecture sur %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "impossible de renommer %s: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Succès"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Pas de concordance"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expression régulière invalide"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Caractère de collation invalide"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nom de classe de caractères invalide"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Antislash de protection"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Référence arrière invalide"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ ou [^ non refermé"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( ou \\( non refermé"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ non refermé"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Contenu de \\{\\} invalide"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Fin d'intervalle invalide"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Mémoire épuisée"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "L'expression régulière précédente est invalide"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Fin prématurée d'une expression régulière"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Expression régulière trop grande"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") ou \\) non refermé"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Pas d'expression régulière précédente"
diff --git a/po/ga.po b/po/ga.po
new file mode 100644
index 0000000..9b452bb
--- /dev/null
+++ b/po/ga.po
@@ -0,0 +1,446 @@
+# Irish translations for sed
+# Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+# Kevin Patrick Scannell <scannell@SLU.EDU>, 2003, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-07 11:04-0500\n"
+"Last-Translator: Kevin Patrick Scannell <scannell@SLU.EDU>\n"
+"Language-Team: Irish <ga@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=4; plural=n==1 ? 0 : (n>1 && n<7) ? 1 : (n>6 && n "
+"<11) ? 2 : 3;\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "`!'-anna iomadúla"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "`,' gan choinne"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "ní féidir +N nó ~N a úsáid mar an chéad seoladh"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "`{' corr"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "`}' gan choinne"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "carachtair breise i ndiaidh an t-ordú"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "bhíothas ag súil le \\ i ndiaidh `a', `c', nó `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "Níl fáilte roimh seoltaí le `}'"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr "Níl fáilte roimh seoltaí le `:'"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "níl fáilte roimh seoltaí le nótaí tráchta"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "ordú ar iarraidh"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "ní úsáidtear an t-ordú ach seoladh amháin"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "seoladh regex gan chríochnú"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "ordú `s' gan chríochnú"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "ordú `y' gan chríochnú"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "rogha anaithnid i ndiaidh `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "an iomarca roghanna `p' i ndiaidh `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "an iomarca roghanna `g' i ndiaidh `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "an iomarca roghanna uimhriúla i ndiaidh `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "ní cheadaítear nialas mar rogha uimhriúil leis an ordú `s'"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "ní aon fad amháin ar na teaghráin leis an ordú `y'"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "tá an teorantóir ina charachtar ilbheart"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "bhíothas ag súil le leagan `sed' níos úire"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "ní féidir an seoladh líne 0 a úsáid"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "ordú anaithnid: `%c'"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: comhad %s líne %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e slonn #%lu, char %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "níl aon fháil ar an lipéad `%s' don léim"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ní féidir %s a léamh: %s\n"
+
+# Irish is nice this way, no initial mutation on 'rud'! -- KPS
+# Include all three b/c I'm using template version of "Plural-Forms"
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "níorbh fhéidir %s a chur in eagar; is teirminéal é"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "níorbh fhéidir %s a chur in eagar: ní gnáthcomhad é"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "níorbh fhéidir an comhad sealadach %s a oscailt: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "earráid i bhfo-phróiseas"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "níl an rogha `e' ar fáil"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "níl an t-ordú `e' ar fáil"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "níl aon slonn ionadaíochta roimh seo"
+
+# bunathraitheoir is in FARF - KPS
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "ní féidir bunathraitheoirí a shonrú le slonn bán"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "tagairt neamhbhailí \\%d ar dheis ordú `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" bain úsáid as sloinn ionadaíochta atá ag Perl 5.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Úsáid: %s [ROGHA]... {script-mura-bhfuil-script-eile} [inchomhad]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" stop priontáil uathoibríoch den spás patrúin\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e script, --expression=script\n"
+" cuir an script leis na horduithe le rith\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f comhad-script, --file=comhad-script\n"
+" cuir na línte i `comhad-script' leis na horduithe le rith\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[IARMHÍR], --in-place[=IARMHÍR]\n"
+" cuir eagar ar comhaid san áit a bhfuil siad (agus déan\n"
+" cúltaca má tá IARMHÍR tugtha\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" ceap an fad timfhillte le haghaidh an ordaithe `l'\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" díchumasaigh gach feabhsúchán GNU.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" úsáid sloinn ionadaíochta feabhsaithe sa script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" féach ar comhaid ina leith seachas mar sruth leanúnach.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" lódáil cantaí beaga ó na comhaid ionchur agus sruthlaigh\n"
+" na maoláin aschur níos minice\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help taispeáin an chabhair seo agus éirigh as\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version taispeáin eolas faoin leagan agus éirigh as\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Mura bhfuil rogha -e, --expression, -f, nó --file ann, glacfar an chéad\n"
+"argóint nach raibh ina rogha mar an script `sed' a léirmhíniú. Tá gach\n"
+"argóint eile an t-ainm de comhad ionchuir; mura bhfuil comhad ann\n"
+"léigh ón ionchur caighdeánach.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Seol tuairiscí fabhtanna chuig: %s .\n"
+"Cuir an focal ``%s'' áit éigin sa líne ``Subject:'' le do thoil.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed, leagan %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"bunaithe ar GNU sed, leagan %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed, leagan %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Is saorbhogearra an ríomhchlár seo; féach ar an bhunchód le haghaidh\n"
+"coinníollacha cóipeála. Níl baránta AR BITH ann; go fiú níl baránta ann\n"
+"d'INDÍOLTACHT nó FEILIÚNACHT DO FHEIDHM AR LEITH, an oiread atá ceadaithe\n"
+"de réir dlí.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "ní féidir %s a scriosadh: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "níorbh fhéidir an comhad %s a oscailt: %s"
+
+# Irish is nice this way, no initial mutation on 'rud'! -- KPS
+# Include all three b/c I'm using template version of "Plural-Forms"
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "níorbh fhéidir %d rud a scríobh i %s: %s"
+msgstr[1] "níorbh fhéidir %d rud a scríobh i %s: %s"
+msgstr[2] "níorbh fhéidir %d rud a scríobh i %s: %s"
+msgstr[3] "níorbh fhéidir %d rud a scríobh i %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "earráid ag léamh %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "ní féidir %s a athainmniú: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Bua!"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Níl a leithéid ann"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Slonn ionadaíochta neamhbhailí"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Carachtar cóimheasa neamhbhailí"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Aicme charachtair neamhbhailí"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Cúlslais ag deireadh"
+
+# coinage - KPS
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Cúltagairt neamhbhailí"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ nó [^ corr"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( nó \\( corr"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ corr"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ábhar neamhbhailí idir \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Deireadh raoin neamhbhailí"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Cuimhne ídithe"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Is neamhbhailí an slonn ionadaíochta roimh seo"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Deireadh le slonn ionadaíochta gan choinne"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Slonn ionadaíochta rómhór"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") nó \\) corr"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Níl aon slonn ionadaíochta roimh seo"
diff --git a/po/gl.po b/po/gl.po
new file mode 100644
index 0000000..8bfab43
--- /dev/null
+++ b/po/gl.po
@@ -0,0 +1,433 @@
+# Galician translation of GNU sed
+# Copyright (C) 1999, 2002 Free Software Foundation, Inc.
+# Jacobo Tarrío Barreiro <jtarrio@trasno.net>, 1999, 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-10-25 15:57+0200\n"
+"Last-Translator: Jacobo Tarrío Barreiro <jtarrio@trasno.net>\n"
+"Language-Team: Galician <gpul-traduccion@ceu.fi.udc.es>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n!=1;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Múltiples `!'s"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "`,' inesperada"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Non se pode usar +N ou ~N como primeira dirección"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' sen parella"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "`}' inesperado"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Caracteres extra despois da instrucción"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Esperábase \\ despois de `a', `c' ou `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' non acepta un enderezo"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": non acepta un enderezo"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Os comentarios non aceptan enderezos"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Falta unha instrucción"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "A instrucción só usa un enderezo"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Expresión regular de enderezo non rematada"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Instrucción `s' non rematada"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Instrucción `y' non rematada"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Opción de `s' descoñecida"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "múltiples opcións `p' para a instrucción `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "múltiples opcións `g' para a instrucción `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "múltiples opcións numéricas para a instrucción `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "unha opción numérica para a instrucción `s' non pode ser cero"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "as cadeas para a instrucción y teñen lonxitudes diferentes"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Uso non válido de modificador de dirección"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Instrucción descoñecida:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: ficheiro %s liña %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expresión #%lu, carácter %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Non se puido atopa-la etiqueta para saltar a `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: non se puido ler %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Non se puido abri-lo ficheiro %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Non se puido abri-lo ficheiro %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "erro no subproceso"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "a opción `e' non está soportada"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "o comando `e' non está soportado"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Non hai unha expresión regular anterior"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Non se poden especificar modificadores nunha expresión regular baleira"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Referencia \\%d non válida no lado dereito do comando `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usa-la sintaxe de expresións regulares de Perl 5 no "
+"script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usa-la sintaxe de expresións regulares de Perl 5 no "
+"script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Informe dos erros no programa a %s .\n"
+"Informe dos erros na traducción a gpul-traduccion@ceu.fi.udc.es .\n"
+"Asegúrese de incluí-la palabra ``%s'' nalgunha parte do campo ``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versión %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr "baseado en GNU sed versión 3.02.80\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versión %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Isto é software libre; vexa o código fonte polas condicións de copia. NON "
+"hai\n"
+"garantía; nin sequera de COMERCIABILIDADE ou APTITUDE PARA UN FIN "
+"DETERMINADO,\n"
+"ata o que permite a lei.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: non se puido ler %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Non se puido abri-lo ficheiro %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "non se puido escribir %d elemento en %s: %s"
+msgstr[1] "non se puideron escribir %d elementos en %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "erro de lectura en %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: non se puido ler %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Éxito"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Non se atopou"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expresión regular non válida"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Carácter de ordeamento non válido"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nome de clase de caracteres non válido"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Barra invertida á fin de liña"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Referencia cara a atrás non válida"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ ou [^ sen parella"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( ou \\( sen parella"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ sen parella"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Contido de \\{\\} non válido"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Fin de rango non válida"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memoria esgotada"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Expresión regular anterior non válida"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Fin prematura da expresión regular"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Expresión regular grande de máis"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") ou \\) sen parella"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Non hai unha expresión regular anterior"
diff --git a/po/he.po b/po/he.po
new file mode 100644
index 0000000..aa4771d
--- /dev/null
+++ b/po/he.po
@@ -0,0 +1,423 @@
+# Hebrew messages for GNU Sed -*- coding: hebrew-iso-8bit -*-
+# Copyright (C) 2001 Free Software Foundation, Inc.
+# Eli Zaretskii <eliz@is.elta.co.il>, 2001.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 3.02.80\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2001-08-04 20:37+0300\n"
+"Last-Translator: Eli Zaretskii <eliz@gnu.org>\n"
+"Language-Team: Hebrew <eliz@gnu.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-8\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "`!' éåáéø"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "éåôö-éúìá `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "äðåùàø úáåúëë ~N åà +N-á ùîúùäì ïúéð àì"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "âåæ-ïá åì ïéàù `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "éåôö-éúìá `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "äãå÷ôä éøçà íéøúåéî íéåú"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "úåáåúë ìá÷î åðéà `}'"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr "úåáåúë ìá÷î åðéà :"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "úåáåúë úåìá÷î ïðéà úåøòä"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "äøñç äãå÷ô"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "ãáìá úçà úáåúë úìá÷î åæ äãå÷ô"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "íåéñ àìì úáåúë ìù éøìåâø éåèéá"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "íåéñ àìì `s' úãå÷ô"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "íåéñ àìì `y' úãå÷ô"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "`s' ìù øëåî-éúìá ïééôàî"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "`s' äãå÷ôì íéáåøî `p' éðééôàî"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "`s' äãå÷ôì íéáåøî `g' éðééôàî"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "`s' äãå÷ôì íéáåøî øôñî éðééôàî"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "ñôà úåéäì ìåëé åðéà `s' äãå÷ôì éøôñî ïééôàî"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "äðåù êøåàá ïðéä `y' äãå÷ôì úåæåøçî"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "úáåúëä ïééöîá éåâù ùåîéù"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "úøëåî-éúìá äãå÷ô äðéä"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s úéðëúá (%s õáå÷ ìù %lu äøåù) %s äàéâù\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s úéðëúá (%lu 'ñî -e éåèéá ìù %lu 'ñî åú) %s äàéâù\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "äàöîð àì `%s' äöéô÷ úéååú"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s úéðëúá %s úàéø÷á (%s) äàéâù\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "%s õáå÷ úçéúôá äì÷ú"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "%s õáå÷ úçéúôá äì÷ú"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr ""
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+" .%s úáåúëì (bugs) äì÷ú éçååéã çåìùì àð\n"
+" .(``Subject'') ``ïåãðä'' úøåùá ``%s'' äìéî ìåìëì åãéô÷ä àðà\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"äååìî äðéà úéðëúä .øå÷îä éöá÷ úà äàø ,èåøéô øúéì ;úéùôç äðëú äðéä åæ úéðëú\n"
+" äãéîá úàæå ,àéäù úéìëú åæéàì äîàúä åà úåøéçñ íùì àì åìéôà ;úåéøçà áúëá\n"
+" .úàæ øùôàî ÷åçäù\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s úéðëúá %s úàéø÷á (%s) äàéâù\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "%s õáå÷ úçéúôá äì÷ú"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "èìôäî íé÷ìç %d ìù %s-ì äáéúëá (%s) äì÷ú"
+msgstr[1] "èìôäî íé÷ìç %d ìù %s-ì äáéúëá (%s) äì÷ú"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "%s úàéø÷á (%s) äì÷ú"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s úéðëúá %s úàéø÷á (%s) äàéâù\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr ""
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "âåæ-ïá åì ïéàù `{'"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "âåæ-ïá åì ïéàù `{'"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "âåæ-ïá åì ïéàù `{'"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr ""
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr ""
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr ""
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "âåæ-ïá åì ïéàù `{'"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr ""
diff --git a/po/hr.po b/po/hr.po
new file mode 100644
index 0000000..693c5ff
--- /dev/null
+++ b/po/hr.po
@@ -0,0 +1,431 @@
+# Translation of sed to Croatian
+# Copyright (C) 2002 Free Software Foundation, Inc.
+# Denis Lackovi <delacko@fly.srk.fer.hr>, 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 3.02a\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-06-14 15:17-01\n"
+"Last-Translator: Denis Lackovic <delacko@fly.srk.fer.hr>\n"
+"Language-Team: Croatian <lokalizacija@linux.hr>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n==1?0:1);\n"
+"X-Generator: TransDict server\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Višestruki `!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "NeoÄekivani `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Ne mogu koristiti +N ili ~N kao prvu adresu"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Neuparena `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "NeoÄekivana `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Višak znakova nakon komande"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' ne želi nikakve adrese"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ne želi nikakve adrese"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Komentari ne primaju adrese"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Nedostaje naredba"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Naredba koristi samo jednu adresu"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Nezavršeni regularni izraz adrese"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Nezavršena `s' naredba"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Nezavršena `y' naredba"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Nepoznata opcija za `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "višestruke `p' opcije za `s' naredbu"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "višestruke `g' opcije za `s' naredbu"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "višak opcija za za `s' naredbu"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "broj opcija za naredbu `s' ne smije biti nula"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "znakovni nizovi za naredbu y su razliÄitih duljina"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Neispravna uporaba adresnog modifikatora"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Nepoznata naredba:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: datoteka %s redak %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e izraz #%lu, znak %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Ne mogu naći labelu na koju bi trebalo skoÄiti `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ne mogu Äitati %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Ne mogu otvoriti datoteku %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Ne mogu otvoriti datoteku %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Nedostaje prethodni regularni izraz"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"E-mail bug prijave (na engleskom) pošaljite na: %s .\n"
+"UkljuÄite rijeÄ ``%s'' u polju ``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Ovo je slobodni softver; pogledajte kod kako biste doznali uvjete "
+"kopiranja.\n"
+" NEMA garancije;\n"
+"Äak ni tvrdnje o ISPLATIVOSTI ili POGODNOSTI ZA NEKU SVRHU.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: ne mogu Äitati %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Ne mogu otvoriti datoteku %s"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "Ne mogu upisati %d item%s u %s: %s"
+msgstr[1] "Ne mogu upisati %d item%s u %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "GreÅ¡ka u Äitanju na %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: ne mogu Äitati %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+#, fuzzy
+msgid "Invalid regular expression"
+msgstr "Nedostaje prethodni regularni izraz"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "Neuparena `{'"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "Neuparena `{'"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "Neuparena `{'"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+#, fuzzy
+msgid "Invalid preceding regular expression"
+msgstr "Nedostaje prethodni regularni izraz"
+
+#: lib/regcomp.c:192
+#, fuzzy
+msgid "Premature end of regular expression"
+msgstr "Nedostaje prethodni regularni izraz"
+
+#: lib/regcomp.c:195
+#, fuzzy
+msgid "Regular expression too big"
+msgstr "Nedostaje prethodni regularni izraz"
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "Neuparena `{'"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Nedostaje prethodni regularni izraz"
diff --git a/po/hu.po b/po/hu.po
new file mode 100644
index 0000000..ff703af
--- /dev/null
+++ b/po/hu.po
@@ -0,0 +1,431 @@
+# Hungarian translation of GNU sed
+# Copyright (C) 2002 Free Software Foundation, Inc.
+# Gábor István <stive@mezobereny.hu>, 2002.
+# Mihály Gyulai <gyulai@fbi.hu>, 2003.
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-10-26 09:28+0100\n"
+"Last-Translator: Mihály Gyulai <gyulai@fbi.hu>\n"
+"Language-Team: Hungarian <translation-team-hu@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Több `!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Váratlan `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Nem lehet használni a +N-t vagy ~N-t elso címként"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Nincs párban `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Nincs párban `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Extra karakterek a parancs után "
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "`a', `c' vagy `i' parancs után \\ szükséges"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' nem igényel címzést"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": nem igényel címzést"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Megjegyzésben nem lehet címzés"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Hiányzó parancs"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "A parancs csak egy címzést használ"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Befejezetlen regex cím"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Befejezetlen `s' parancs"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Befejezetlen `y' parancs"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Ismeretlen `s' opció"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "többszörös `p' opció, `s' parancs mellett"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "többszörös `g' opció, `s' parancs mellett"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "többszörös szám opció, `s' parancs mellett"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "a(z) `s' parancs szám opciója nem lehet nulla"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "a(z) `y' parancs szövegeinek hossza különbözõ"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "a 'sed' program újabb verziójára van szükség"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "A parancs csak egy címzést használ"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Ismeretlen parancs:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fájl %s sor %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e kifejezés #%lu, karakter %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Az ugráshoz (`%s') nem találom a címkét"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: nem lehet olvasni %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Nem lehet megnyitni a(z) %s fájlt: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Nem lehet megnyitni az átmeneti fájlt: %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "hiba az alfolyamatban"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "az `e' opció nincs támogatva"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "az `e' parancs nincs támogatva"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Nincsen elõzõ reguláris kifejezés"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Nem lehet módosítót megadni üres reguláris kifejezéshez"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Hibás hivatkozás (\\%d) a(z) `s' parancs RHS-ére"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R --regexp-perl\n"
+" Perl 5 reguláris kifejezés nyelvtanának használata.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R --regexp-perl\n"
+" Perl 5 reguláris kifejezés nyelvtanának használata.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"A fordítási hibákat kérem a gyulai@fbi.hu címre küldeni. \n"
+"Angolul ide: %s . A levél Tárgy mezejében legyen ott a `%s' szó.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed verzió %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"A GNU 3.02.80-as sed verzión alapszik\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed verzió %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Ez egy ingyenes számítógépes program. A forrásban megtalálhatók a másolás "
+"feltételei.\n"
+"SEMMILYEN garanciát nem vállalunk, még azt sem állítjuk, hogy ez a program\n"
+"KERESKEDELMI CÉLOKRA ALKALMAS vagy HASZNÁLHATÓ EGY ADOTT FELADATRA.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: nem lehet olvasni %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Nem lehet megnyitni a(z) %s fájlt: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "nem tudom a(z) %d elemet ide írni %s: %s"
+msgstr[1] "nem tudom a(z) %d elemeket ide írni %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "olvasási hiba %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: nem lehet olvasni %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sikeres"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Nincs találat"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Hibás reguláris kifejezés"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Érvénytelen összehasonlító karakter"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Érvénytelen karakterosztály-név"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Lezáró visszaperjel"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Érvénytelen vissza-hivatkozás"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Nincs párban [ vagy [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Nincs párban ( vagy \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Nincs párban \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "\\{\\}-nak érvénytelen a tartalma"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Sorozat érvénytelen vége"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Kevés a memória"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Érvénytelen megelõzõ reguláris kifejezés"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Reguláris kifejezés túl korai vége"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Túl nagy reguláris kifejezés"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Nincs párban ) vagy \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Nincsen elõzõ reguláris kifejezés"
diff --git a/po/id.po b/po/id.po
new file mode 100644
index 0000000..e1e8fae
--- /dev/null
+++ b/po/id.po
@@ -0,0 +1,431 @@
+# translation of sed-4.0.9.id.po to Indonesian
+# sed 4.0.9 (Indonesian)
+# Copyright (C) 1999, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
+# Tedi Heriyanto <tedi_h@gmx.net>, 2002, 2003, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.9\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-04-27 14:56+0700\n"
+"Last-Translator: Tedi Heriyanto <tedi_h@gmx.net>\n"
+"Language-Team: Indonesian <translation-team-id@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+"X-Generator: KBabel 1.3\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "`!' ganda"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "`,' tidak diharapkan"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Tidak dapat menggunakan +N atau ~N sebagai alamat pertama"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' tidak sesuai"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "`}' tidak diharapkan"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Karakter tambahan setelah perintah"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Diharapkan \\ setelah `a', `c' atau `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' tidak menginginkan alamat"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": tidak menginginkan alamat"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Komentar tidak menerima alamat"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Perintah hilang"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Perintah hanya menggunakan satu alamat"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Alamat regex yang tidak selesai"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Perintah `s' tidak selesai"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Perintah `y' tidak selesai"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Opsion tidak dikenal bagi `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "opsion `p' ganda bagi perintah `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "opsion `g' ganda bagi perintah `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "opsion beragam untuk perintah `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "opsion angka untuk perintah `s' tidak boleh nol"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "string untuk perintah y dalam panjang yang berbeda"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "mengharapkan versi baru sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Penggunaan modifier alamat yang tidak valid"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Perintah tidak dikenal:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: file %s baris %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e ekspresi #%lu, char %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Tidak dapat menemukan label untuk melompat ke `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: tidak dapat membaca %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Tidak dapat membuka file %s %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Tidak dapat membuka file temporer %s %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "kesalahan dalam subproses"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "option `e' tidak didukung"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "perintah `e' tidak didukung"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Tidak ada reguler ekspresi sebelumnya"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Tidak dapat menspesifikasikan modified pada regexp kosong"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Referensi tidak valid \\%d pada perintah `s' RHS"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" gunakan sintaks reguler ekspresi Perl 5 dalam skrip.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" gunakan sintaks reguler ekspresi Perl 5 dalam skrip.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Email laporan kesalahan ke: %s \n"
+"Pastikan untuk menyertakan kata \"%s\" di field \"Subject:\".\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versi %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"berdasarkan pada GNU sed versi 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versi %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: tidak dapat membaca %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Tidak dapat membuka file %s %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "tidak dapat menulis %d item ke %s: %s"
+msgstr[1] "tidak dapat menulis %d item ke %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "kesalahan pembacaan pada %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: tidak dapat membaca %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sukses"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Tidak cocok"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Reguler ekspresi tidak valid"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Karakter kolasi tidak valid"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nama kelas karakter tidak valid"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Trailing backslash"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Referensi balik tidak valid"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ atau [^ tidak sesuai"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( atau \\( tidak sesuai"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ tidak sesuai"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Isi \\{\\} tidak valid"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Akhir batas tidak valid"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memori habis"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Reguler ekspresi sebelumnya tidak valid"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Akhir reguler ekspresi prematur"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Reguler ekspresi terlalu besar"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") atau \\) tidak sesuai"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Tidak ada reguler ekspresi sebelumnya"
diff --git a/po/it.po b/po/it.po
new file mode 100644
index 0000000..0742b5d
--- /dev/null
+++ b/po/it.po
@@ -0,0 +1,508 @@
+# traduzione di sed
+# Copyright (C) 1999 Free Software Foundation, Inc.
+# Paolo Bonzini <bonzini@gnu.org>, 2001
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0a\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-11-26 12:44+0100\n"
+"Last-Translator: Paolo Bonzini <bonzini@gnu.org>\n"
+"Language-Team: Italian <tp@lists.linux.it>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n != 1;\n"
+
+# sed/compile.c:166
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "`!' multipli"
+
+# sed/compile.c:167
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "`,' inattesa"
+
+# sed/compile.c:169
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "impossibile usare +N o ~N come primo indirizzo"
+
+# sed/compile.c:170
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "`{' non bilanciata"
+
+# sed/compile.c:171
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "`}' inattesa"
+
+# sed/compile.c:172
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "ci sono altri caratteri dopo il comando"
+
+# sed/compile.c:173
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "atteso \\ dopo `a', `c' o `i'"
+
+# sed/compile.c:174
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' non accetta indirizzi"
+
+# sed/compile.c:175
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": non accetta indirizzi"
+
+# sed/compile.c:176
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "i commenti non accettano indirizzi"
+
+# sed/compile.c:177
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "manca il comando"
+
+# sed/compile.c:178
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "il comando usa solo un indirizzo"
+
+# sed/compile.c:179
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "espressione regolare non terminata nell'indirizzo"
+
+# sed/compile.c:180
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "comando `s' non terminato"
+
+# sed/compile.c:181
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "comando `y' non terminato"
+
+# sed/compile.c:182
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "opzione di `s' sconosciuta"
+
+# sed/compile.c:183
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "opzioni `p' multiple al comando `s'"
+
+# sed/compile.c:184
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "opzioni `g' multiple al comando `s'"
+
+# sed/compile.c:186
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "opzioni numeriche multiple al comando `s'"
+
+# sed/compile.c:188
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "l'opzione numerica del comando `s' non può essere zero"
+
+# sed/compile.c:190
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "le stringhe per il comandi `y' hanno lunghezze diverse"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "il carattere delimitatore è multi-byte"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "attesa una versione piu' recente di sed"
+
+# sed/compile.c:178
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "utilizzo non valido dell'indirizzo 0"
+
+# sed/compile.c:1319
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "comando sconosciuto: `%c'"
+
+# sed/compile.c:1340
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: file %s riga %lu: %s\n"
+
+# sed/compile.c:1343
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: espressione -e #%lu, carattere %lu: %s\n"
+
+# sed/compile.c:1543
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "impossibile trovare un'etichetta per il salto a `%s'"
+
+# sed/execute.c:516
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: impossibile leggere %s: %s\n"
+
+# sed/execute.c:675
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "impossibile modificare %s: è un terminale"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "impossibile modificare %s: non è un file normale"
+
+# lib/utils.c:131
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "impossibile aprire il file temporaneo %s: %s"
+
+# sed/execute.c:1003 sed/execute.c:1183
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "errore in un sottoprocesso"
+
+# sed/execute.c:1005
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opzione `e' non supportata"
+
+# sed/execute.c:1185
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "comando `e' non supportato"
+
+# lib/regcomp.c:658 sed/regex.c:47
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "occorre un'espressione regolare precedente"
+
+# sed/regex.c:48
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "non è possibile specificare dei modificatori per l'espressione vuota"
+
+# sed/regex.c:146
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "riferimento non valido \\%d nel secondo membro del comando `s'"
+
+# sed/sed.c:98
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usa la sintassi Perl 5 per le espressioni regolari\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Utilizzo: %s [OPZIONE]... {script-se-nessun-altro-specificato} [input-"
+"file]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" sopprime la stampa automatica del pattern space\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e script, --expression=script\n"
+" aggiunge lo script ai comandi da eseguire\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f script-file, --file=file-script\n"
+" aggiunge il contenuto di file-script ai comandi da "
+"eseguire\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" scrive il risultato sul file originale (facendo una copia\n"
+" se è fornita un'estensione)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" specifica la lunghezza delle linee generate dal comando "
+"`l'\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" disabilita tutte le estensioni GNU.\n"
+
+# sed/sed.c:98
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" usa la sintassi di `egrep' per le espressioni regolari\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+"%s -s, --separate\n"
+" considera i file di input come separati invece che come un\n"
+" unico file lungo.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" carica e visualizza i dati una a pezzetti piu' piccoli\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help mostra questo aiuto ed esce\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version stampa le informazioni sulla versione ed esce\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Se non è usata nessuna delle opzioni -e, --expression, -f o --file allora "
+"il\n"
+"primo argomento che non è una opzione sarà usato come lo script sed da\n"
+"interpretare. Tutti gli argomenti rimanenti sono nomi di file di input; se "
+"non\n"
+"sono specificati file di input sarà letto lo standard input.\n"
+"\n"
+
+# sed/sed.c:132
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Segnalare eventuali bug a: %s .\n"
+"Assicurarsi di includere la parola ``%s'' nell'oggetto del messaggio.\n"
+
+# sed/sed.c:255
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versione %s\n"
+
+# sed/sed.c:256
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"basato su GNU sed versione %s\n"
+"\n"
+
+# sed/sed.c:258
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versione %s\n"
+
+# sed/sed.c:260
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Questo è software libero; si veda il sorgente per le condizioni di "
+"copiatura.\n"
+"NON c'è garanzia; neppure di COMMERCIABILITA' o IDONEITA' AD UN PARTICOLARE\n"
+"SCOPO, nei limiti permessi dalla legge.\n"
+
+# sed/execute.c:516
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "impossibile rimuovere %s: %s"
+
+# lib/utils.c:131
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "impossibile aprire il file %s: %s"
+
+# lib/utils.c:161
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "Impossibile scrivere %d elemento su %s: %s"
+msgstr[1] "Impossibile scrivere %d elementi su %s: %s"
+
+# lib/utils.c:176
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "errore di lettura su %s: %s"
+
+# sed/execute.c:516
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "impossibile rinominare %s: %s"
+
+# lib/regcomp.c:179
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Successo"
+
+# lib/regcomp.c:182
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Nessuna corrispondenza trovata"
+
+# lib/regcomp.c:185
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Espressione regolare non valida"
+
+# lib/regcomp.c:188
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Carattere di ordinamento non valido"
+
+# lib/regcomp.c:191
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nome non valido per una classe di caratteri"
+
+# lib/regcomp.c:194
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Barra rovesciata alla fine dell'espressione regolare"
+
+# lib/regcomp.c:197
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Riferimento non valido"
+
+# lib/regcomp.c:200
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "`[' non bilanciata"
+
+# lib/regcomp.c:203
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "`(' o `\\(' non bilanciata"
+
+# lib/regcomp.c:206
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "`\\{' non bilanciata"
+
+# lib/regcomp.c:209
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "numero di ripetizioni specificato tra graffe non valido"
+
+# lib/regcomp.c:212
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Fine dell'intervallo non valida"
+
+# lib/regcomp.c:215
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memoria esaurita"
+
+# lib/regcomp.c:218
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Espressione regolare precedente non valida"
+
+# lib/regcomp.c:221
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Fine prematura dell'espressione regolare"
+
+# lib/regcomp.c:224
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Espressione regolare troppo grande"
+
+# lib/regcomp.c:227
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "`)' o `\\)' non bilanciata"
+
+# lib/regcomp.c:658 sed/regex.c:47
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Occorre un'espressione regolare precedente"
diff --git a/po/ja.po b/po/ja.po
new file mode 100644
index 0000000..47205b2
--- /dev/null
+++ b/po/ja.po
@@ -0,0 +1,444 @@
+# Japanese messages for GNU sed
+# Copyright (C) 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
+# IIDA Yosiaki <iida@gnu.org>, 1999, 2002, 2003, 2004.
+# This file is distributed under the same license as the GNU sed package.
+# Contributed by
+# Yasuyuki Furukawa <yasu@on.cs.keio.ac.jp>, 1999.
+# and taken over on 1999-09-24 by Japanese Team.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU sed 4.0.6\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-04-21 23:53+0900\n"
+"Last-Translator: IIDA Yosiaki <iida@gnu.org>\n"
+"Language-Team: Japanese <translation-team-ja@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=EUC-JP\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Ê£¿ô¤Î`!'¤¬¤¢¤ê¤Þ¤¹"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "ͽ´ü¤µ¤ì¤Ê¤¤`,'¤Ç¤¹"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "+N¤ä~N¤òºÇ½é¤Î¥¢¥É¥ì¥¹¤Ë»ØÄꤹ¤ë¤³¤È¤Ï¤Ç¤­¤Þ¤»¤ó"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{'¤¬Äà¹ç¤¤¤Þ¤»¤ó"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "ͽ´ü¤µ¤ì¤Ê¤¤`}'¤Ç¤¹"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "¥³¥Þ¥ó¥É¤Î¸å¤í¤Ë;·×¤Êʸ»ú¤¬¤¢¤ê¤Þ¤¹"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "\\¤¬¡Öa¡×¡Öc¡×¡Öi¡×¤Î¸å¤Ëͽ´ü¤µ¤ì¤Þ¤¹"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}'¤Ë¥¢¥É¥ì¥¹¤Ï¤¤¤ê¤Þ¤»¤ó"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ":¤Ë¥¢¥É¥ì¥¹¤Ï¤¤¤ê¤Þ¤»¤ó"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "¥³¥á¥ó¥È¤Ï¥¢¥É¥ì¥¹¤ò¼õ¤±ÉÕ¤±¤Þ¤»¤ó"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "¥³¥Þ¥ó¥É¤¬Â­¤ê¤Þ¤»¤ó"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "¥³¥Þ¥ó¥É¤Ï¤Ò¤È¤Ä¤Î¥¢¥É¥ì¥¹¤À¤±¤ò»È¤¤¤Þ¤¹"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "¥¢¥É¥ì¥¹regex¤¬½ªÎ»¤·¤Æ¤¤¤Þ¤»¤ó"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "`s'¥³¥Þ¥ó¥É¤¬½ªÎ»¤·¤Æ¤¤¤Þ¤»¤ó"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "`y'¥³¥Þ¥ó¥É¤¬½ªÎ»¤·¤Æ¤¤¤Þ¤»¤ó"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "`s'¤Ø¤Î¥ª¥×¥·¥ç¥ó¤¬ÉÔÌÀ¤Ç¤¹"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "`s'¥³¥Þ¥ó¥É¤ËÂФ·¤ÆÊ£¿ô¤Î`p'¥ª¥×¥·¥ç¥ó¤¬¤¢¤ê¤Þ¤¹"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "`s'¥³¥Þ¥ó¥É¤ËÂФ·¤ÆÊ£¿ô¤Î`g'¥ª¥×¥·¥ç¥ó¤¬¤¢¤ê¤Þ¤¹"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "`s'¥³¥Þ¥ó¥É¤ËÂФ·¤ÆÊ£¿ô¤Î¿ôÃÍ¥ª¥×¥·¥ç¥ó¤¬¤¢¤ê¤Þ¤¹"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "`s'¥³¥Þ¥ó¥É¤Ø¤Î¿ôÃÍ¥ª¥×¥·¥ç¥ó¤ÏÎí¤Ç¤Ï¤¤¤±¤Þ¤»¤ó"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "y¥³¥Þ¥ó¥É¤Ø¤Îʸ»úÎó¤ÎŤµ¤¬°ã¤¤¤Þ¤¹"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "sed¤Î¿·ÈǤ¬Á°Äó¤Ç¤¹"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "¥³¥Þ¥ó¥É¤Ï¤Ò¤È¤Ä¤Î¥¢¥É¥ì¥¹¤À¤±¤ò»È¤¤¤Þ¤¹"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "ÉÔÌÀ¤Ê¥³¥Þ¥ó¥É¤Ç¤¹:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: ¥Õ¥¡¥¤¥ë %s %lu¹Ô: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e ɽ¸½ #%lu, ʸ»ú¿ô %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "`%s'¤Ø¤Î¥¸¥ã¥ó¥×¤Î¥é¥Ù¥ë¤¬¸«¤Ä¤«¤ê¤Þ¤»¤ó"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s¤òÆɤ߹þ¤á¤Þ¤»¤ó: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "¥Õ¥¡¥¤¥ë%s¤ò³«¤±¤Þ¤»¤ó¤Ç¤·¤¿: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "°ì»þ¥Õ¥¡¥¤¥ë¤ò³«¤±¤Þ¤»¤ó¤Ç¤·¤¿: %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "»Ò¥×¥í¥»¥¹¤Î¥¨¥é¡¼"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "e¥ª¥×¥·¥ç¥ó¤Ï¡¢¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤Þ¤»¤ó"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "e¥³¥Þ¥ó¥É¤Ï¡¢¥µ¥Ý¡¼¥È¤µ¤ì¤Æ¤¤¤Þ¤»¤ó"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "ľÁ°¤ÎÀµµ¬É½¸½¤¬¡¢¤¢¤ê¤Þ¤»¤ó"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "½¤¾þ»Ò¤Ï¡¢¶õ¤ÎÀµµ¬É½¸½¤Ë»ØÄê¤Ç¤­¤Þ¤»¤ó"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "¡Ös¡×¥³¥Þ¥ó¥É¤Î±¦Â¦¤Ë̵¸ú¤Ê\\%d¤Î»²¾È"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" ¥¹¥¯¥ê¥×¥È¤ÇPerl 5¤ÎÀµµ¬É½¸½¹½Ê¸¤ò»È¤¦¡£\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" ¥¹¥¯¥ê¥×¥È¤ÇPerl 5¤ÎÀµµ¬É½¸½¹½Ê¸¤ò»È¤¦¡£\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"ÅŻҥ᡼¥ë¤Ë¤è¤ë¥Ð¥°Êó¹ð¤Î°¸Àè: %s\n"
+"Êó¹ð¤¹¤ëºÝ¤Ë¤Ï``Subject:''¥Õ¥£¡¼¥ë¥É¤Î¤É¤³¤«¤Ë``%s''¤òÆþ¤ì¤Æ¤¯¤À¤µ¤¤¡£\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed %sÈÇ\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"¸¶ºîGNU sed 3.02.80ÈÇ\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+"GNU sed %sÈÇ\n"
+"\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+"\n"
+"ÌõÃí: Èó¾ï¤Ë½ÅÍפÊʸ¾Ï¤Î¤¿¤á¡¢¸¶Ê¸¤ò»Ä¤·¤Æ¤¤¤Þ¤¹¡£\n"
+" -- »²¹ÍÌõ\n"
+"¤³¤ì¤Ï¥Õ¥ê¡¼¡¦¥½¥Õ¥È¥¦¥§¥¢¤Ç¤¹¡£Ê£À½¤Î¾ò·ï¤Ë´Ø¤·¤Æ¤Ï¡¢¥½¡¼¥¹¤ò¤´Í÷¤¯¤À¤µ"
+"¤¤¡£\n"
+"ÊݾڤϰìÀÚ¤¢¤ê¤Þ¤»¤ó¡£±ÄÍøÌÜŪ¤äË¡¤ÇÄê¤á¤é¤ì¤¿ÈϰϤǤÎÆÃÄêÌÜŪ¤Î¤¿¤á¤ÎŬ¹ç"
+"À­\n"
+"¤â¤¢¤ê¤Þ¤»¤ó¡£\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s¤òÆɤ߹þ¤á¤Þ¤»¤ó: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "¥Õ¥¡¥¤¥ë%s¤ò³«¤±¤Þ¤»¤ó¤Ç¤·¤¿: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%d¸Ä¤Î¥¢¥¤¥Æ¥à¤ò%s¤Ø½ñ¤­¹þ¤á¤Þ¤»¤ó¤Ç¤·¤¿: %s"
+msgstr[1] "%d¸Ä¤Î¥¢¥¤¥Æ¥à¤ò%s¤Ø½ñ¤­¹þ¤á¤Þ¤»¤ó¤Ç¤·¤¿: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "%s¤ÎÆɹþ¤ß¥¨¥é¡¼: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s¤òÆɤ߹þ¤á¤Þ¤»¤ó: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "À®¸ù"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "¾È¹ç¤·¤Þ¤»¤ó"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "̵¸ú¤ÊÀµµ¬É½¸½"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "̵¸ú¤Ê¹»¹çʸ»ú"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "̵¸ú¤Êʸ»ú¥¯¥é¥¹Ì¾"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "¸å³¤ÎµÕ¥¹¥é¥Ã¥·¥å"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "̵¸ú¤ÊµÕ»²¾È"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[¤ä[^¤¬Äà¹ç¤¤¤Þ¤»¤ó"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "(¤ä\\(¤¬Äà¹ç¤¤¤Þ¤»¤ó"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{¤¬Äà¹ç¤¤¤Þ¤»¤ó"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "̵¸ú¤Ê\\{\\}¤ÎÆâÍÆ\""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "̵¸ú¤ÊÈϰϤνªÃ¼"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "¥á¥â¥ê¡¼¤¬Â­¤ê¤Þ¤»¤ó"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "̵¸ú¤ÊÀè¹ÔÀµµ¬É½¸½"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "ͽ´ü¤»¤ÌÀµµ¬É½¸½¤Î½ªÃ¼"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Â礭²á¤®¤ëÀµµ¬É½¸½"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ")¤ä\\)¤¬°ìÃפ·¤Þ¤»¤ó"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "ľÁ°¤ÎÀµµ¬É½¸½¤¬¡¢¤¢¤ê¤Þ¤»¤ó"
diff --git a/po/ko.po b/po/ko.po
new file mode 100644
index 0000000..81c12a4
--- /dev/null
+++ b/po/ko.po
@@ -0,0 +1,423 @@
+# ko.po -- Korean messages for GNU sed
+# Copyright (C) 2001 Free Software Foundation, Inc.
+# Jong-Hoon Ryu <redhat4u@netian.com>, 2001.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU sed 3.02.80\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2001-10-12 17:26+0900\n"
+"Last-Translator: Jong-Hoon Ryu <redhat4u@netian.com>\n"
+"Language-Team: Korean <ko@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=EUC-KR\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "`!' °¡ Áߺ¹µÇ¾ú½À´Ï´Ù"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "ºÒÇÊ¿äÇÑ `,' °¡ »ç¿ëµÇ°í ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "ù¹ø° ÁÖ¼Ò·Î '+N' ¶Ç´Â '~N' À» »ç¿ëÇÒ ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' °¡ ÀÏÄ¡ÇÏÁö ¾Ê½À´Ï´Ù"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "ºÒÇÊ¿äÇÑ `}' °¡ »ç¿ëµÇ°í ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "¸í·ÉµÚ¿¡ ÇÊ¿ä¾ø´Â ¹®ÀÚµéÀÌ ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' ¿¡ ¾î¶°ÇÑ ÁÖ¼Òµµ ÇÊ¿äÄ¡ ¾Ê½À´Ï´Ù"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ¿¡ ¾î¶°ÇÑ ÁÖ¼Òµµ ÇÊ¿äÄ¡ ¾Ê½À´Ï´Ù"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "ÄÚ¸àÆ®¿¡ ¾î¶°ÇÑ ÁÖ¼Òµµ »ç¿ëÇÒ ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "¸í·ÉÀÌ ÁöÁ¤µÇÁö ¾Ê¾Ò½À´Ï´Ù"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "¸í·É¿¡ ÇϳªÀÇ ÁÖ¼Ò¸¸ »ç¿ëÇÒ ¼ö ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "ÁÖ¼Ò Á¤±ÔÇ¥Çö½Ä Á¾·áµÇÁö ¾Ê¾Ò½À´Ï´Ù"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "`s' ¸í·ÉÀÌ Á¾·áµÇÁö ¾Ê¾Ò½À´Ï´Ù"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "`y' ¸í·ÉÀÌ Á¾·áµÇÁö ¾Ê¾Ò½À´Ï´Ù"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "`s' ¿¡ ¾Ë ¼ö ¾ø´Â ¿É¼ÇÀÌ ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "`s' ¸í·É¿¡ `p' ¿É¼ÇÀÌ Áߺ¹µÇ¾î ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "`s' ¸í·É¿¡ `g' ¿É¼ÇÀÌ Áߺ¹µÇ¾î ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "`s' ¸í·É¿¡ ¼ýÀÚ ¿É¼ÇÀÌ Áߺ¹µÇ¾î ÀÖ½À´Ï´Ù"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "`s' ¸í·ÉÀÇ ¼ýÀÚ ¿É¼Ç¿¡ '0' À» ÁöÁ¤ÇÒ ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "`y' ¸í·ÉÀÇ ¹®ÀÚ¿­ÀÌ ±æÀÌ°¡ ´Ù¸¨´Ï´Ù"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "ÁÖ¼Ò º¯°æÀÚ(modifier)ÀÇ »ç¿ëÀÌ ¿Ã¹Ù¸£Áö ¾Ê½À´Ï´Ù"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "¾Ë ¼ö ¾ø´Â ¸í·É:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: %s ÆÄÀÏÀÇ %lu ¹ø° ÁÙ: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expression #%lu, char %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "`%s' (À¸)·Î Á¡ÇÁÇÒ ·¹À̺íÀ» ãÀ» ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s (À»)¸¦ ÀÐÀ» ¼ö ¾øÀ½: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "%s ÆÄÀÏÀ» ¿­ ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "%s ÆÄÀÏÀ» ¿­ ¼ö ¾ø½À´Ï´Ù"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr ""
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"¹ö±×¸¦ º¸°íÇÒ E-mail ÁÖ¼Ò: %s .\n"
+"``Subject:'' Ç׸ñ¿¡ ¹Ýµå½Ã ``%s'' ´Ü¾î¸¦ Æ÷ÇÔÇØ Áֽñ⠹ٶø´Ï´Ù.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"ÀÌ ÇÁ·Î±×·¥Àº ÀÚÀ¯ ¼ÒÇÁÆ®¿þ¾î ÀÔ´Ï´Ù; ÀÚ¼¼ÇÑ ³»¿ëÀº ÀúÀÛ±Ç ³»¿ëÀÇ ¿ø¹®À»\n"
+"Âü°íÇϽñ⠹ٶø´Ï´Ù. ÀÌ ÇÁ·Î±×·¥Àº ¹ý¿¡ ÀúÃ˵ÇÁö ¾Ê´Â ¹üÀ§¿¡¼­ »ó¾÷ÀûÀ̳ª\n"
+"Ư¼ö ¸ñÀûÀ¸·Î »ç¿ëµÉ °æ¿ì¸¦ Æ÷ÇÔÇÑ ¾î¶°ÇÑ °æ¿ì¿¡µµ º¸ÁõÇÏÁö ¾Ê½À´Ï´Ù.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s (À»)¸¦ ÀÐÀ» ¼ö ¾øÀ½: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "%s ÆÄÀÏÀ» ¿­ ¼ö ¾ø½À´Ï´Ù"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%2$s ¿¡ %1$d Ç׸ñ(item)À» ±â·ÏÇÒ ¼ö ¾ø½À´Ï´Ù: %3$s"
+msgstr[1] "%2$s ¿¡ %1$d Ç׸ñ(item)À» ±â·ÏÇÒ ¼ö ¾ø½À´Ï´Ù: %3$s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "%s ÀÇ ³»¿ë Àб⠿À·ù: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s (À»)¸¦ ÀÐÀ» ¼ö ¾øÀ½: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr ""
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "`{' °¡ ÀÏÄ¡ÇÏÁö ¾Ê½À´Ï´Ù"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "`{' °¡ ÀÏÄ¡ÇÏÁö ¾Ê½À´Ï´Ù"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "`{' °¡ ÀÏÄ¡ÇÏÁö ¾Ê½À´Ï´Ù"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr ""
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr ""
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr ""
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "`{' °¡ ÀÏÄ¡ÇÏÁö ¾Ê½À´Ï´Ù"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr ""
diff --git a/po/nl.po b/po/nl.po
new file mode 100644
index 0000000..3277524
--- /dev/null
+++ b/po/nl.po
@@ -0,0 +1,435 @@
+# Dutch translation of sed.
+# Copyright (C) 2004 Free Software Foundation, Inc.
+# This file is distributed under the same license as the sed package.
+# Elros Cyriatan <cyriatan@fastmail.fm>, 2004.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.9\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-04-09 21:39+0200\n"
+"Last-Translator: Elros Cyriatan <cyriatan@fastmail.fm>\n"
+"Language-Team: Dutch <vertaling@nl.linux.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=n != 1;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Meerdere `!'s"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Onverwachte `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Kan niet +N of ~N als eerste adres gebruiken"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Niet-overeenkomende `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Onverwachte `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Extra tekens na opdracht"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Verwachtte \\ na `a', `c' of `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' wil geen adressen"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": wil geen adressen"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Opmerkingen accepteren geen adres"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Ontbrekende opdracht"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Opdracht gebruikt slechts één adres"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Niet-afgemaakt adres reguliere uitdrukking"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Niet-afgemaakte `s'-opdracht"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Niet-afgemaakte `y'-opdracht"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Onbekende optie voor `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "meerdere `p'-opties voor `s'-opdracht"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "meerdere `g'-opties voor `s'-opdracht"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "meerdere getalopties voor `s'-opdracht"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "getaloptie voor `s'-opdracht mag niet nul zijn"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "tekenreeksen voor `y'-opdracht zijn van verschillende lengte"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "verwachtte een nieuwere versie van sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Opdracht gebruikt slechts één adres"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Onbekende opdracht:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: bestand %s regel %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e uitdrukking #%lu, teken %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Kan label voor sprong naar `%s' niet vinden"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: kan %s niet lezen: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Kon bestand %s niet openen: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Kon tijdelijk bestand %s niet openen: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "fout in deelproces"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "optie `e' niet ondersteund"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "`e'-opdracht niet ondersteund"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Geen eerdere reguliere uitdrukking"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Kan geen veranderaars opgeven op lege reguliere uitdrukking"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Ongeldige verwijzing \\%d op rechterhandzijde van `s'-opdracht"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" de syntaxis van Perl 5 voor reguliere uitdrukkingen "
+"gebruiken in het script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" de syntaxis van Perl 5 voor reguliere uitdrukkingen "
+"gebruiken in het script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Stuur foutrapporten naar: %s .\n"
+"Zorg ervoor dat het woord ``%s'' ergens in het ``Onderwerp:''-veld staat.\n"
+"Rapporteer fouten in de vertalingen bij <vertaling@nl.linux.org>.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versie %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"gebaseerd op GNU sed versie 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versie %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Dit is vrije software; zie de bron voor kopieervoorwaarden. Er is GEEN\n"
+"garantie; zelfs niet voor VERHANDELBAARHEID of GESCHIKTHEID VOOR\n"
+"EEN BEPAALD DOEL, tot het uiterste dat door de wet wordt toegestaan.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: kan %s niet lezen: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Kon bestand %s niet openen: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "kon niet %d item naar %s schrijven: %s"
+msgstr[1] "kon niet %d item naar %s schrijven: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "leesfout op %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: kan %s niet lezen: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Succes"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Geen overeenkomst"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Ongeldige reguliere uitdrukking"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Ongeldig sorteerteken"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Ongeldige tekenklasse naam"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Backslash aan het einde"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Ongeldige terugverwijzing"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Niet-overeenkomende [ of [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Niet-overeenkomende ( of \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Niet-overeenkomende \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ongeldige inhoud van \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Ongeldig bereikeinde"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Geheugen uitgeput"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Ongeldige voorafgaande reguliere uitdrukking"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Vroegtijdig einde van reguliere uitdrukking"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Reguliere uitdrukking te groot"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Niet-overeenkomende ) of \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Geen eerdere reguliere uitdrukking"
diff --git a/po/pl.po b/po/pl.po
new file mode 100644
index 0000000..273ef35
--- /dev/null
+++ b/po/pl.po
@@ -0,0 +1,445 @@
+# Polish translations for GNU sed package.
+# Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+# Wojciech Polak <polak@gnu.org>, 2003, 2004.
+# corrections: Jakub Bogusz <qboosh@pld-linux.org>, 2003.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-08 19:58+0200\n"
+"Last-Translator: Wojciech Polak <polak@gnu.org>\n"
+"Language-Team: Polish <translation-team-pl@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 "
+"|| n%100>=20) ? 1 : 2);\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "wielokrotny znak `!'"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "nieoczekiwany znak `,'"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "nieprawid³owe u¿ycie +N lub ~N jako pierwszego adresu"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "niedopasowany znak `{'"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "nieoczekiwany znak `}'"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "dodatkowe znaki po poleceniu"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "oczekiwano znaku \\ po `a', `c' lub `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' nie chce ¿adnych adresów"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": nie chce ¿adnych adresów"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "komentarze nie akceptuj± ¿adnych adresów"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "brakuje polecenia"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "polecenie u¿ywa tylko jednego adresu"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "niezakoñczony adres wyra¿enia regularnego"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "niezakoñczone polecenie `s'"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "niezakoñczone polecenie `y'"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "nieznana opcja dla polecenia `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "wielokrotne opcje `p' dla polecenia `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "wielokrotne opcje `g' dla polecenia `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "wielokrotne opcje liczbowe dla polecenia `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "opcja liczbowa dla polecenia `s' nie mo¿e byæ zerem"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "³añcuchy dla polecenia `y' s± ró¿nych d³ugo¶ci"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "znak ogranicznika nie jest pojedynczym znakiem-bajtem"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "oczekiwano nowszej wersji programu sed"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "nieprawid³owe u¿ycie adresu linii 0"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "nieznane polecenie: `%c'"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: plik %s linia %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e wyra¿enie #%lu, znak %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "nie mo¿na znale¼æ etykiety dla skoku do `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: nie mo¿na odczytaæ %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "nie mo¿na edytowaæ %s: plik jest terminalem"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "nie mo¿na edytowaæ %s: to nie jest regularny plik"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "nie mo¿na otworzyæ tymczasowego pliku %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "b³±d w podprocesie"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opcja `e' nie jest wspierana"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "polecenie `e' nie jest wspierane"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "brak poprzedniego wyra¿enia regularnego"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "nie mo¿na wyszczególniæ modyfikatorów w pustym wyra¿eniu regularnym"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "nieprawid³owe odwo³anie \\%d po prawej stronie polecenia `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" u¿ywa w skrypcie wyra¿enia regularne zgodne ze sk³adni± "
+"Perl 5.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"U¿ycie: %s [OPCJE] {skrypt-tylko-wtedy-gdy-¿aden-inny-skrypt} [plik-"
+"wej¶ciowy]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" powstrzymuje automatyczne drukowanie przetwarzanych linii.\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e skrypt, --expression=skrypt\n"
+" dodaje skrypt do poleceñ, które maj± byæ wykonane.\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f plik-skryptowy, --file=plik-skryptowy\n"
+" dodaje zawarto¶æ pliku skryptowego do poleceñ,\n"
+" które maj± byæ wykonane.\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[rozszerzenie], --in-place[=rozszerzenie]\n"
+" edytuje pliki \"w miejscu\" (tworzy kopie zapasowe\n"
+" je¿eli zosta³o podane rozszerzenie).\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" ustala po¿±dan± d³ugo¶æ ³amanych linii dla polecenia `l'.\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" wy³±cza wszystkie rozszerzenia GNU.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" u¿ywa w skrypcie rozszerzonych wyra¿eñ regularnych.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" traktuje pliki jako oddzielne, a nie jako pojedynczy,\n"
+" d³ugi i ci±g³y strumieñ.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" ³aduje minimaln± ilo¶æ danych z plików wej¶ciowych\n"
+" i czê¶ciej oczyszcza bufor wyj¶ciowy.\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help wy¶wietla tê oto pomoc i koñczy pracê.\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version wy¶wietla numer wersji i koñczy pracê.\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Je¿eli nie zostan± podane opcje -e, --expression, -f, lub --file,\n"
+"to wtedy pierwszy argument, który nie jest opcj± linii poleceñ sed,\n"
+"zostanie wziêty jako skrypt sed do przetworzenia. Wszystkie pozosta³e\n"
+"argumenty s± nazwami plików wej¶ciowych; je¿eli nie zostan± podane\n"
+"¿adne pliki wej¶ciowe, to wtedy odczytane zostanie standardowe wej¶cie.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Ewentualne b³êdy prosimy zg³aszaæ na adres: %s\n"
+"W tym celu proszê dodaæ s³owo ``%s'' do tematu listu.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed wersja %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"na podstawie wersji GNU sed %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed wersja %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Niniejszy program jest wolnym oprogramowaniem; warunki kopiowania s± "
+"opisane\n"
+"w ¼ród³ach. Autorzy nie daj± ¯ADNYCH gwarancji, w tym równie¿ gwarancji\n"
+"PRZYDATNO¦CI DO SPRZEDA¯Y LUB DO KONKRETNYCH CELÓW.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "nie mo¿na usun±æ %s: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "nie mo¿na otworzyæ pliku %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "nie mo¿na zapisaæ %d elementu do %s: %s"
+msgstr[1] "nie mo¿na zapisaæ %d elementów do %s: %s"
+msgstr[2] "nie mo¿na zapisaæ %d elementów do %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "b³±d odczytu w %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "nie mo¿na zmieniæ nazwy %s: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sukces"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Brak dopasowania"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Nieprawid³owe wyra¿enie regularne"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Nieprawid³owy znak porównania"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nieprawid³owa nazwa klasy znaku"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Koñcowy znak backslash"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Nieprawid³owe odwo³anie wsteczne"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Niedopasowany znak [ lub [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Niedopasowany znak ( lub \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Niedopasowany znak \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Nieprawid³owa zawarto¶æ \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Nieprawid³owy koniec zakresu"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Pamiêæ wyczerpana"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Nieprawid³owe poprzedzaj±ce wyra¿enie regularne"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Przedwczesny koniec wyra¿enia regularnego"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Wyra¿enie regularne jest zbyt du¿e"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Niedopasowany znak ) lub \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Brak poprzedniego wyra¿enia regularnego"
diff --git a/po/pt_BR.po b/po/pt_BR.po
new file mode 100644
index 0000000..b9b037d
--- /dev/null
+++ b/po/pt_BR.po
@@ -0,0 +1,433 @@
+# traduções para o português do Brasil das mensagens de erro do sed
+# Copyright (C) 1999 Free Software Foundation, Inc.
+# Aurélio Marinho Jargas <aurelio@conectiva.com.br>, 1999, 2002.
+#
+# Tradução original da versão 4.01:
+# Juan Carlos Castro y Castro <jcastro@vialink.com.br>, 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-11-08 17:44-0300\n"
+"Last-Translator: Aurélio Marinho Jargas <aurelio@verde666.org>\n"
+"Language-Team: Brazilian Portuguese <ldp-br@bazar.conectiva.com.br>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Exclamações `!' múltiplas"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Vírgula `,' inesperada"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Não use +N ou ~N como o primeiro endereço"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' não terminada"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "`}' inesperada"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Há caracteres sobrando após o comando"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Deve haver um escape \\ depois dos comandos `a', `c' e `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' não recebe endereços"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr "`:' não recebe endereços"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Comentários não aceitam endereços"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Falta especificar um comando ao endereço"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Este comando usa apenas um endereço"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "A expressão regular do endereço está inacabada (falta a /)"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Comando `s' inacabado (s/// - faltou delimitador)"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Comando `y' inacabado (y/// - faltou delimitador)"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Opção desconhecida para o comando `s' (s///?)"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "opções `p' múltiplas para o comando `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "opções `g' múltiplas para o comando `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "opções numéricas múltiplas para o comando `s' (s///n)"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "A opção numérica para o comando `s' não pode ser zero (s///0)"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "Os textos para o comando `y' têm tamanhos diferentes (y/abc/z/)"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Uso incorreto do modificador de endereço"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Comando desconhecido:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: arquivo %s linha %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expressão #%lu, caractere %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Não foi possível encontrar a marcação `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: não foi possível ler %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Não foi possível abrir o arquivo %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Não foi possível abrir o arquivo %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "erro no subprocesso"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opção `e' não suportada"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "comando `e' não suportado"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Não há expressão regular anterior"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Não é permitido especificar modificadores numa expressão regular vazia"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Referência inválida \\%d na segunda parte do comando `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usar sintaxe de expressões regulares do Perl 5 no script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" usar sintaxe de expressões regulares do Perl 5 no script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Envie relatórios de erros (em inglês) para: %s .\n"
+"Inclua a palavra ``%s'' no campo ``Assunto:'' ou ``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed versão %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"baseado no GNU sed versão 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versão %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Este programa é Software Livre. Veja os fontes para conhecer as condições\n"
+"de cópia. NÃO há garantias, nem mesmo para os aspectos mercantis ou de\n"
+"atendimento a finalidades específicas, tanto quanto a lei permita.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: não foi possível ler %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Não foi possível abrir o arquivo %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "não foi possível escrever %d item para %s: %s"
+msgstr[1] "não foi possível escrever %d items para %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "erro de leitura em %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: não foi possível ler %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Sucesso"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Nada encontrado"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expressão regular inválida"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Caractere de ordenação inválido"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nome inválido de classe de caracteres"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Escape \\ no final"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Retrovisor \\n inválido"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ ou [^ não terminado"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( ou \\( não terminado"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ não terminado"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Conteúdo inválido no \\{\\} (permitidos números e vírgula)"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Fim de intervalo (range) inválido"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Falta de memória"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Expressão regular anterior inválida"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Fim prematuro da expressão regular"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Expressão regular grande demais"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") or \\) inesperado"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Não há expressão regular anterior"
diff --git a/po/ro.po b/po/ro.po
new file mode 100644
index 0000000..024f14e
--- /dev/null
+++ b/po/ro.po
@@ -0,0 +1,435 @@
+# Mesajele în limba românã pentru sed.
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# Acest fiºier este distribuit sub aceeaºi licenþã ca ºi pachetul sed.
+# Laurentiu Buzdugan <buzdugan@voyager.net>, 2003.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-11-22 12:00-0500\n"
+"Last-Translator: Laurentiu Buzdugan <buzdugan@voyager.net>\n"
+"Language-Team: Romanian <translation-team-ro@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "`!'-uri multiple"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "`,' neaºteptat"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Nu se poate folosi +N ºi ~N ca prima adresã"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "`{' fãrã pereche"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "`}' neaºteptat"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Extra caractere dupã comandã"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Este aºteptat \\ dupã `a', `c' sau `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' nu vrea nici o adresã"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": nu vrea nici o adresã"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Comentariile nu acceptã nici o adresã"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Comandã absentã"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Comanda foloseºte numai o adresã"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Adresã regex neterminatã"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Comandã `s' neterminatã"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Comandã `y' neterminatã"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Opþiune necunoscutã pentru `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "multiple opþiuni `p' pentru comanda `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "multiple opþiuni `g' pentru comanda `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "numãr multiplu de opþiuni pentru comanda `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "numãrul de opþiuni pentru comanda `s' nu poate fi zero"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "ºirurile pentru comanda y au lungimi diferite"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "am aºteptat o versiune mai recentã de sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Comanda foloseºte numai o adresã"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Comandã necunoscutã:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fiºierul %s linia %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e expresia #%lu, caracterul %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Nu pot gãsi eticheta pentru saltul la `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: nu pot citi %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Nu am putut deschide fiºierul %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Nu am putut deschide fiºierul temporar %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "eroare în subproces"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "opþiunea `e' nu e suportatã"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "comanda `e' nu e suportatã"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Nici o expresie regularã anterioarã"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Nu se pot specifica modificatori pentru regexp vidã"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Referinþã invalidã \\%d pentru RHS-ul comanzii `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" foloseºte sintaxa expresiilor regulare din Perl 5 în "
+"script.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" foloseºte sintaxa expresiilor regulare din Perl 5 în "
+"script.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Raportaþi bug-uri prin e-mail la: %s .\n"
+"Fiþi siguri cã includeþi ``%s'' undeva în câmpul ``Subject:''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "versiunea super-sed %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"bazat pe GNU sed versiunea 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed versiunea %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Acesta este software liber; vedeþi codul sursã pentru condiþiile de "
+"copiere.\n"
+"Nu existã NICI o garanþie; nici mãcar pentru VANDABILITATE sau POTRIVIRE \n"
+"PENTRU UN ANUME SCOP, conform legilor în vigoare.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: nu pot citi %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Nu am putut deschide fiºierul %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "Nu am putut scrie %d articol în %s: %s"
+msgstr[1] "Nu am putut scrie %d articole în %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "eroare citire pentru %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: nu pot citi %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Succes"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Nici o potrivire"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Expresie regularã incorectã"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Colaþiune de caractere incorectã"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Nume de clasã de caractere incorect"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Backslash în coadã"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Referinþã înapoi incorectã"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "[ sau [^ fãrã pereche"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "( sau \\( fãrã pereche"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "\\{ fãrã pereche"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Conþinut incorect pentru \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Sfârºit de interval incorect"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Memorie epuizatã"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Expresie regularã precedentã incorectã"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Sfârºit prematur al expresiei regulare"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Expresie regularã prea mare"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ") sau \\) fãrã pereche"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Nici o expresie regularã anterioarã"
diff --git a/po/ru.po b/po/ru.po
new file mode 100644
index 0000000..1cb4ef8
--- /dev/null
+++ b/po/ru.po
@@ -0,0 +1,442 @@
+# Translation of sed-4.1.1.po to Russian
+# Copyright (C) 1998, 2004 Free Software Foundation, Inc.
+# Const Kaplinsky <const@ce.cctpu.edu.ru>, 1998.
+# Pavel Maryanov <acid_jack@ukr.net>, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed-4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-07 17:54+0300\n"
+"Last-Translator: Pavel Maryanov <acid_jack@ukr.net>\n"
+"Language-Team: Russian <ru@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=KOI8-R\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=n>1;\n"
+"X-Generator: KBabel 1.3\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "ÎÅÓËÏÌØËÏ ÓÉÍ×ÏÌÏ× `!'"
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "ÎÅÐÒÅÄ×ÉÄÅÎÎÙÊ ÓÉÍ×ÏÌ `,'"
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "ÉÓÐÏÌØÚÏ×ÁÎÉÅ +N ÉÌÉ ~N × ËÁÞÅÓÔ×Å ÐÅÒ×ÏÇÏ ÁÄÒÅÓÁ ÎÅÄÏÐÕÓÔÉÍÏ"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "ÎÅÐÁÒÎÙÊ ÓÉÍ×ÏÌ `{'"
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "ÎÅÐÒÅÄ×ÉÄÅÎÎÙÊ ÓÉÍ×ÏÌ `}'"
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "ÌÉÛÎÉÅ ÓÉÍ×ÏÌÙ ÐÏÓÌÅ ËÏÍÁÎÄÙ"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "ÏÖÉÄÁÌÁÓØ \\ ÐÏÓÌÅ `a', `c' ÉÌÉ `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' ÎÅ ÄÏÐÕÓËÁÅÔ ÕËÁÚÁÎÉÑ ËÁËÉÈ-ÌÉÂÏ ÁÄÒÅÓÏ×"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr "`:' ÎÅ ÄÏÐÕÓËÁÅÔ ÕËÁÚÁÎÉÑ ËÁËÉÈ-ÌÉÂÏ ÁÄÒÅÓÏ×"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "ËÏÍÍÅÎÔÁÒÉÉ ÎÅ ÄÏÐÕÓËÁÀÔ ÕËÁÚÁÎÉÑ ËÁËÉÈ-ÌÉÂÏ ÁÄÒÅÓÏ×"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "ÏÔÓÕÔÓÔ×ÕÅÔ ËÏÍÁÎÄÁ"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "ËÏÍÁÎÄÁ ÉÓÐÏÌØÚÕÅÔ ÔÏÌØËÏ ÏÄÉÎ ÁÄÒÅÓ"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "ÎÅÚÁ×ÅÒÛÅÎÎÏÅ ÁÄÒÅÓÎÏÅ ÒÅÇÕÌÑÒÎÏÅ ×ÙÒÁÖÅÎÉÅ"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "ÎÅÚÁ×ÅÒÛÅÎÎÁÑ ËÏÍÁÎÄÁ `s'"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "ÎÅÚÁ×ÅÒÛÅÎÎÁÑ ËÏÍÁÎÄÁ `y'"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "ÎÅÉÚ×ÅÓÔÎÙÊ ÍÏÄÉÆÉËÁÔÏÒ Ë `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "ÎÅÓËÏÌØËÏ ÍÏÄÉÆÉËÁÔÏÒÏ× `p' Ó ËÏÍÁÎÄÏÊ `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "ÎÅÓËÏÌØËÏ ÍÏÄÉÆÉËÁÔÏÒÏ× `g' Ó ËÏÍÁÎÄÏÊ `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "ÎÅÓËÏÌØËÏ ÞÉÓÌÏ×ÙÈ ÍÏÄÉÆÉËÁÔÏÒÏ× Ó ËÏÍÁÎÄÏÊ `s'"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "ÞÉÓÌÏ×ÏÊ ÍÏÄÉÆÉËÁÔÏÒ ÄÌÑ ËÏÍÁÎÄÙ `s' ÎÅ ÍÏÖÅÔ ÂÙÔØ ÎÕÌÅ×ÙÍ"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "ÓÔÒÏËÉ ÄÌÑ ËÏÍÁÎÄÙ `y' ÉÍÅÀÔ ÒÁÚÎÕÀ ÄÌÉÎÕ"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "ÓÉÍ×ÏÌ-ÒÁÚÄÅÌÉÔÅÌØ ÎÅ Ñ×ÌÑÅÔÓÑ ÏÄÎÏÂÁÊÔÏ×ÙÍ ÓÉÍ×ÏÌÏÍ"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "ÏÖÉÄÁÌÁÓØ ÂÏÌÅÅ ÎÏ×ÁÑ ×ÅÒÓÉÑ sed"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "ÎÅÄÏÐÕÓÔÉÍÏÅ ÉÓÐÏÌØÚÏ×ÁÎÉÅ ÓÔÒÏËÉ ÁÄÒÅÓÁ 0"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "ÎÅÉÚ×ÅÓÔÎÁÑ ËÏÍÁÎÄÁ: `%c'"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: ÆÁÊÌ %s ÓÔÒÏËÁ %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e ×ÙÒÁÖÅÎÉÅ #%lu, ÓÉÍ×ÏÌ %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÎÁÊÔÉ ÍÅÔËÕ ÄÌÑ ÐÅÒÅÈÏÄÁ Ë `%s'"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: ÎÅ×ÏÚÍÏÖÎÏ ÐÒÏÞÉÔÁÔØ %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÒÅÄÁËÔÉÒÏ×ÁÔØ %s: ÜÔÏ ÔÅÒÍÉÎÁÌ"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÒÅÄÁËÔÉÒÏ×ÁÔØ %s: ÜÔÏ ÎÅ ÏÂÙÞÎÙÊ ÆÁÊÌ"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÏÔËÒÙÔØ ×ÒÅÍÅÎÎÙÊ ÆÁÊÌ %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "ÏÛÉÂËÁ × ÐÏÄÐÒÏÃÅÓÓÅ"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "ÏÐÃÉÑ `e' ÎÅ ÐÏÄÄÅÒÖÉ×ÁÅÔÓÑ"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "ËÏÍÁÎÄÁ `e' ÎÅ ÐÏÄÄÅÒÖÉ×ÁÅÔÓÑ"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "ÎÅÔ ÐÒÅÄÙÄÕÝÅÇÏ ÒÅÇÕÌÑÒÎÏÇÏ ×ÙÒÁÖÅÎÉÑ"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÕËÁÚÁÔØ ÍÏÄÉÆÉËÁÔÏÒÙ × ÐÕÓÔÏÍ ÒÅÇÕÌÑÒÎÏÍ ×ÙÒÁÖÅÎÉÉ"
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "ÎÅÄÏÐÕÓÔÉÍÁÑ ÓÓÙÌËÁ \\%d ÎÁ RHS ËÏÍÁÎÄÙ `s'"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" ÉÓÐÏÌØÚÏ×ÁÎÉÅ × ÓËÒÉÐÔÅ ÓÉÎÔÁËÓÉÓÁ ÒÅÇÕÌÑÒÎÙÈ ×ÙÒÁÖÅÎÉÊ "
+"Perl 5.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"éÓÐÏÌØÚÏ×ÁÎÉÅ: %s [ïðãéñ]... {ÔÏÌØËÏ-ÓËÒÉÐÔ-ÅÓÌÉ-ÎÅÔ-ÄÒÕÇÏÇÏ-ÓËÒÉÐÔÁ} "
+"[×ÈÏÄÎÏÊ-ÆÁÊÌ]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" ÐÏÄÁ×ÌÅÎÉÅ Á×ÔÏÍÁÔÉÞÅÓËÏÇÏ ×Ù×ÏÄÁ ÐÒÏÍÅÖÕÔËÏ×\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e script, --expression=script\n"
+" ÄÏÂÁ×ÌÅÎÉÅ ÓËÒÉÐÔÁ × ÉÓÐÏÌÎÑÅÍÙÅ ËÏÍÁÎÄÙ\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f script-file, --file=script-file\n"
+" ÄÏÂÁ×ÌÅÎÉÅ ÓÏÄÅÒÖÉÍÏÇÏ ÆÁÊÌÁ-ÓËÒÉÐÔÁ × ÉÓÐÏÌÎÑÅÍÙÅ ËÏÍÁÎÄÙ\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[óõææéëó], --in-place[=óõææéëó]\n"
+" ÒÅÄÁËÔÉÒÏ×ÁÎÉÅ ÆÁÊÌÏ× ÎÁ ÍÅÓÔÅ (ÓÏÚÄÁÅÔ ËÏÐÉÀ, ÅÓÌÉ ÕËÁÚÁÎÏ "
+"ÒÁÓÛÉÒÅÎÉÅ)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" ÕËÁÚÁÎÉÅ ÖÅÌÁÅÍÏÊ ÄÌÉÎÙ ÐÅÒÅÎÏÓÉÍÏÊ ÓÔÒÏËÉ ÄÌÑ ËÏÍÁÎÄÙ `l'\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" ÏÔËÌÀÞÅÎÉÅ ×ÓÅÈ ÒÁÓÛÉÒÅÎÉÊ GNU.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" ÉÓÐÏÌØÚÏ×ÁÎÉÅ × ÓËÒÉÐÔÅ ÒÁÓÛÉÒÅÎÎÙÈ ÒÅÇÕÌÑÒÎÙÈ ×ÙÒÁÖÅÎÉÊ.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" ÄÏÐÕÝÅÎÉÅ, ÞÔÏ ÆÁÊÌÙ ÒÁÚÄÅÌÅÎÙ, Á ÎÅ × ×ÉÄÅ ÏÄÎÏÇÏ\n"
+" ÄÌÉÎÎÏÇÏ ÎÅÐÒÅÒÙ×ÎÏÇÏ ÐÏÔÏËÁ.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" ÚÁÇÒÕÚËÁ ÍÉÎÉÍÁÌØÎÏÇÏ ÏÂßÅÍÁ ÄÁÎÎÙÈ ÉÚ ×ÈÏÄÎÙÈ ÆÁÊÌÏ×\n"
+" É ÂÏÌÅÅ ÞÁÓÔÙÊ ÓÂÒÏÓ ÎÁ ÄÉÓË ×ÙÈÏÄÎÙÈ ÂÕÆÅÒÏ×\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help ×Ù×ÏÄ ÜÔÏÊ ÓÐÒÁ×ËÉ É ×ÙÈÏÄ\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version ×Ù×ÏÄ ÉÎÆÏÒÍÁÃÉÉ Ï ×ÅÒÓÉÉ É ×ÙÈÏÄ\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"åÓÌÉ ÏÐÃÉÑ -e, --expression, -f, ÉÌÉ --file ÎÅ ÕËÁÚÁÎÁ, ÔÏÇÄÁ ÐÅÒ×ÙÊ\n"
+"ÎÅÏÐÃÉÏÎÁÌØÎÙÊ ÁÒÇÕÍÅÎÔ ÂÅÒÅÔÓÑ ËÁË ÓËÒÉÐÔ sed ÄÌÑ ÉÎÔÅÒÐÒÅÔÁÃÉÉ. ÷ÓÅ\n"
+"ÏÓÔÁ×ÛÉÅÓÑ ÁÒÇÕÍÅÎÔÙ Ñ×ÌÑÀÔÓÑ ÉÍÅÎÁÍÉ ×ÈÏÄÎÙÈ ÆÁÊÌÏ×; ÅÓÌÉ ×ÈÏÄÎÙÅ\n"
+"ÆÁÊÌÙ ÎÅ ÕËÁÚÁÎÙ, ÔÏÇÄÁ ÞÉÔÁÅÔÓÑ ÓÔÁÎÔÁÒÔÎÙÊ ××ÏÄ.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"ïÔÞÅÔÙ Ï ÏÛÉÂËÁÈ ÏÔÐÒÁ×ÌÑÊÔÅ ÐÏ ÁÄÒÅÓÕ: %s .\n"
+"õÂÅÄÉÔÅÓØ, ÞÔÏ ×ËÌÀÞÉÌÉ ÇÄÅ-ÌÉÂÏ × ÐÏÌÅ ``ôÅÍÁ:'' ÓÌÏ×Ï ``%s''.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed ×ÅÒÓÉÑ %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"ÏÓÎÏ×ÁÎ ÎÁ GNU sed ×ÅÒÓÉÉ %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed ×ÅÒÓÉÑ %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"üÔÏ Ó×ÏÂÏÄÎÏÅ ÐÒÏÇÒÁÍÍÎÏÅ ÏÂÅÓÐÅÞÅÎÉÅ; ÕÓÌÏ×ÉÑ ÅÇÏ ËÏÐÉÒÏ×ÁÎÉÑ ÓÍÏÔÒÉÔÅ ×\n"
+"ÉÓÈÏÄÎÙÈ ÔÅËÓÔÁÈ. îÅ ÐÒÅÄÏÓÔÁ×ÌÑÅÔÓÑ îéëáëïê ÇÁÒÁÎÔÉÉ; ÄÁÖÅ ÇÁÒÁÎÔÉÉ\n"
+"ðòéçïäîïóôé äìñ ðòïäáöé ÉÌÉ ðòéíåîéíïóôé äìñ ëïîëòåôîïê ãåìé, × ÔÏÊ ÍÅÒÅ,\n"
+"× ËÏÔÏÒÏÊ ÜÔÏ ÍÏÖÅÔ ÂÙÔØ ÄÏÐÕÝÅÎÏ ÚÁËÏÎÏÄÁÔÅÌØÓÔ×ÏÍ.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÕÄÁÌÉÔØ %s: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÏÔËÒÙÔØ ÆÁÊÌ %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "ÎÅ×ÏÚÍÏÖÎÏ ÚÁÐÉÓÁÔØ %d ÜÌÅÍÅÎÔ × %s: %s"
+msgstr[1] "ÎÅ×ÏÚÍÏÖÎÏ ÚÁÐÉÓÁÔØ %d ÜÌÅÍÅÎÔÏ× × %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "ÏÛÉÂËÁ ÞÔÅÎÉÑ %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "ÎÅ×ÏÚÍÏÖÎÏ ÐÅÒÅÉÍÅÎÏ×ÁÔØ %s: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "õÓÐÅÛÎÏ"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "îÅÔ ÓÏÏÔ×ÅÓÔ×ÉÑ"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "îÅÄÏÐÕÓÔÉÍÏÅ ÒÅÇÕÌÑÒÎÏÅ ×ÙÒÁÖÅÎÉÅ"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "îÅÄÏÐÕÓÔÉÍÙÊ ÓÉÍ×ÏÌ ÓÒÁ×ÎÅÎÉÑ"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "îÅÄÏÐÕÓÔÉÍÏÅ ÉÍÑ ÄÌÑ ËÌÁÓÓÁ ÓÉÍ×ÏÌÁ"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "úÁ×ÅÒÛÁÀÝÁÑ ÏÂÒÁÔÎÁÑ ËÏÓÁÑ ÞÅÒÔÁ"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "îÅÄÏÐÕÓÔÉÍÁÑ ÏÂÒÁÔÎÁÑ ÓÓÙÌËÁ"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "îÅÐÁÒÎÙÊ ÓÉÍ×ÏÌ [ ÉÌÉ [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "îÅÐÁÒÎÙÊ ÓÉÍ×ÏÌ ( or \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "îÅÐÁÒÎÙÊ ÓÉÍ×ÏÌ \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "îÅÄÏÐÕÓÔÉÍÏÅ ÓÏÄÅÒÖÉÍÏÅ × \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "îÅÄÏÐÕÓÔÉÍÏÅ ÏËÏÎÞÁÎÉÅ ÄÉÁÐÁÚÏÎÁ"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "ðÁÍÑÔØ ÉÓÞÅÒÐÁÎÁ"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "îÅÄÏÐÕÓÔÉÍÏÅ ÐÒÅÄÛÅÓÔ×ÕÀÝÅÅ ÒÅÇÕÌÑÒÎÏÅ ×ÙÒÁÖÅÎÉÅ"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "ðÒÅÖÄÅ×ÒÅÍÅÎÎÏÅ ÏËÏÎÞÁÎÉÅ ÒÅÇÕÌÑÒÎÏÇÏ ×ÙÒÁÖÅÎÉÑ"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "òÅÇÕÌÑÒÎÏÅ ×ÙÒÁÖÅÎÉÅ ÓÌÉÛËÏÍ ÂÏÌØÛÏÅ"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "îÅÐÁÒÎÙÊ ÓÉÍ×ÏÌ ) ÉÌÉ \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "îÅÔ ÐÒÅÄÙÄÕÝÅÇÏ ÒÅÇÕÌÑÒÎÏÇÏ ×ÙÒÁÖÅÎÉÑ"
diff --git a/po/sed.pot b/po/sed.pot
new file mode 100644
index 0000000..9f1b67e
--- /dev/null
+++ b/po/sed.pot
@@ -0,0 +1,400 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr ""
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr ""
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr ""
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr ""
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr ""
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr ""
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr ""
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ""
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr ""
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr ""
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr ""
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr ""
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr ""
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr ""
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr ""
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr ""
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr ""
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr ""
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr ""
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr ""
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr ""
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr ""
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr ""
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr ""
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr ""
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr ""
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr ""
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr ""
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr ""
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr ""
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr ""
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] ""
+msgstr[1] ""
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr ""
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr ""
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr ""
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr ""
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr ""
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr ""
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr ""
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr ""
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr ""
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr ""
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr ""
diff --git a/po/sk.po b/po/sk.po
new file mode 100644
index 0000000..9d61079
--- /dev/null
+++ b/po/sk.po
@@ -0,0 +1,435 @@
+# Slovak translations for GNU sed package.
+# Copyright (C) 1999, 2002, 2003 Free Software Foundation, Inc.
+# Marcel Telka <marcel@telka.sk>, 2002, 2003.
+# Miroslav Vasko <vasko@debian.cz>, 1999.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU sed 4.0.8\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-10-25 17:22+0200\n"
+"Last-Translator: Marcel Telka <marcel@telka.sk>\n"
+"Language-Team: Slovak <sk-i18n@lists.linux.sk>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=3; plural= (n==1) ? 1 : (n>=2 && n<=4) ? 2 : 0;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Viacnásobný `!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "NeoÄakávaná `,'"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "+N alebo ~N sa nedá použiť ako prvá adresa"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Nezodpovedajúca `{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "NeoÄakávaná `}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "NadbytoÄné znaky po príkaze"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "OÄakávané \\ po `a', `c' alebo `i'"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "`}' nevyžaduje akúkoľvek adresu"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": nechce akúkoľvek adresu"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "V komentári nie je prípustná akákoľvek adresa"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Chýbajúci príkaz"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Príkaz používa iba jednu adresu"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "NeukonÄený regulárny výraz adresy"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "NeukonÄený príkaz `s'"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "NeukonÄený príkaz `y'"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Neznáma voľba pre `s'"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "viacnásobné použitie voľby `p' s príkazom `s'"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "viacnásobné použitie voľby `g' s príkazom `s'"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "príkaz `s' môže maÅ¥ maximálne jednu Äíselnú voľbu"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "Äíselná voľba príkazu `s' nemôže byÅ¥ nula"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "reťazce pre príkaz `y' majú rôzne dĺžky"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "oÄakávaná novÅ¡ia verzia programu sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Príkaz používa iba jednu adresu"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Neznámy príkaz:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: súbor %s, riadok %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e výraz #%lu, znak %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Návestie pre skok na `%s' nie je možné nájsť?"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s nie je možné ÄítaÅ¥: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Nebolo možné otvoriť súbor %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Nebolo možné otvoriÅ¥ doÄasný súbor %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "chyba v podprocese"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "voľba `e' nie je podporovaná"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "príkaz `e' nie je podporovaný"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Bez predchádzajúceho regulárneho výrazu"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Nie je možné zadať modifikátory pre prázdny regulárny výraz"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Neplatný odkaz \\%d na `s' príkazu RHS"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" použiť syntax regulárnych výrazov z Perlu 5 v skripte.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" použiť syntax regulárnych výrazov z Perlu 5 v skripte.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Správy o chybách zasielajte na adresu %s (iba anglicky).\n"
+"Prosím vložte slovo ``%s'' niekde do položky ``Predmet:''\n"
+"Komentáre k slovenskému prekladu zasielajte na adresu <sk-i18n@lists.linux."
+"sk>.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed verzia %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"založené na GNU sed verzia 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed verzia %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Toto je voľné programové vybavenie; podmienky pre kopírovanie a "
+"rozširovanie\n"
+"nájdete v zdrojových textoch. Toto programové vybavenie je BEZ záruky,\n"
+"a to aj bez záruky PREDAJNOSTI alebo VHODNOSTI PRE NEJAKà KONKRÉTNY ÚČEL.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s nie je možné ÄítaÅ¥: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Nebolo možné otvoriť súbor %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "nebolo možné zapísať %d položiek do %s: %s"
+msgstr[1] "nebolo možné zapísať %d položku do %s: %s"
+msgstr[2] "nebolo možné zapísať %d položky do %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "chyba pri Äítaní z %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s nie je možné ÄítaÅ¥: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Úspech"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Nezodpovedá"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Neplatný regulárny výraz"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Neplatný znak pre porovnávanie"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Neplatný názov triedy znakov"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "UkonÄovacie opaÄné lomítko"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Neplatný spätný odkaz"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Nezodpovedajúca [ alebo [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Nezodpovedajúca ( alebo \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Nezodpovedajúca \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Neplatný obsah \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Neplatný koniec rozsahu"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "VyÄerpaná pamäť"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Neplatný predchádzajúci regulárny výraz"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "NeoÄakávaný koniec regulárneho výrazu"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regulárny výraz je príliš veľký"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Nezodpovedajúca ) alebo \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Bez predchádzajúceho regulárneho výrazu"
diff --git a/po/sl.po b/po/sl.po
new file mode 100644
index 0000000..eb8d889
--- /dev/null
+++ b/po/sl.po
@@ -0,0 +1,435 @@
+# -*- mode:po; coding:iso-latin-2;-*- Slovenian message catalogue for GNU sed.
+# Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+# Primo¾ Peterlin <primoz.peterlin@biofiz.mf.uni-lj.si>, 2000, 2001, 2002, 2003.
+# $Id: sed-4.0.6.sl.po,v 1.2 2003/04/02 15:20:33 peterlin Exp $
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.6\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2003-04-02 17:20+0200\n"
+"Last-Translator: Primo¾ Peterlin <primoz.peterlin@biofiz.mf.uni-lj.si>\n"
+"Language-Team: Slovenian <translation-team-sl@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-2\n"
+"Content-Transfer-Encoding: 8-bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=4; plural=(n%100==1 ? 1 : n%100==2 ? 2 : n%100==3 || n"
+"%100==4 ? 3 : 0);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Veèterni klicaji \"!\""
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Neprièakovana vejica \",\""
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Obliki +N in ~N nista dovoljeni kot zaèetna naslova"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "Oklepaj { brez zaklepaja"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Neprièakovan zaklepaj }"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Ukazu sledijo dodatni znaki"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Za ,a`, ,c` ali ,i` se prièakuje \\"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "Zaklepaj } ne zahteva naslova"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ne zahteva naslova"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Komentarji ne sprejemajo naslovov"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Ukaz manjka"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Ukaz uporablja le en naslov"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Regularni izraz z nezakljuèenim naslovom"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Nezakljuèen ukaz \"s\""
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Nezakljuèen ukaz \"y\""
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Neznane izbire pri ukazu \"s\""
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "veèterne izbire \"p\" pri ukazu \"s\""
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "veèterne izbire \"g\" pri ukazu \"s\""
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "veèterne ¹tevilène izbire pri ukazu \"s\""
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "¹tevilèna izbira pri ukazu \"s\" mora biti nenièelna"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "niza pri ukazu \"y\" sta razlièno dolga"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "prièakovana novej¹a izdaja programa sed"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Nepravilna uporaba modifikatorja naslova"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Neznan ukaz:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: datoteka %s vrstica %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e izraz #%lu, znak %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Oznake za skok na \"%s\" ni mo¾no najti"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s ni mo¾no prebrati: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Datoteke %s ni mogoèe odpreti: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Datoteke %s ni mogoèe odpreti: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "Napaka v podprocesu"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "Izbira ,e` ni podprta"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "Ukaz ,e` ni podprt"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Prej¹nji regularni izraz manjka"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Doloèanje modifikatorjev pri praznem regularnem izrazu ni mogoèe"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "Neveljavni sklic \\%d na desni strani ukaza ,s`"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" dovoli uporabo regularnih izrazov, ki jih podpira Perl 5\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" dovoli uporabo regularnih izrazov, ki jih podpira Perl 5\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Sporoèila o napakah po¹ljite na %s .\n"
+"Poskrbite, da bo nekje v polju ,,Subject`` nastopal izraz ,,%s``.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed, razlièica %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"na osnovi GNU sed, razlièica 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed, razlièica %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"To je prost program; pogoji, pod katerimi ga lahko uporabljate, "
+"razmno¾ujete\n"
+"in raz¹irjate so navedeni v izvorni kodi. Za program ni NOBENEGA JAMSTVA,\n"
+"niti jamstev USTREZNOSTI ZA PRODAJO ali PRIMERNOSTI ZA UPORABO.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s ni mo¾no prebrati: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Datoteke %s ni mogoèe odpreti: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "ni mogoèe zapisati %d elementov na %s: %s"
+msgstr[1] "ni mogoèe zapisati %d elementa na %s: %s"
+msgstr[2] "ni mogoèe zapisati %d elementov na %s: %s"
+msgstr[3] "ni mogoèe zapisati %d elementov na %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "napaka pri branju z %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s ni mo¾no prebrati: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Uspe¹no"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Ni ujemanja"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Neveljavni regularni izraz"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Znaka izven abecede"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Neveljavno ime razreda znakov"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Zakljuèna obrnjena po¹evnica"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Neveljavni povratni sklic"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Oklepaj [ ali [^ brez zaklepaja"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Oklepaj ( ali \\( brez zaklepaja"
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Oklepaj \\{ brez zaklepaja"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Neveljavna vsebina \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Neveljavna zgornja meja intervala"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Zmanjkalo pomnilnika"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Neveljaven prej¹nji regularni izraz"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Predèasni zakljuèek regularnega izraza"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Regularni izraz prevelik"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Oklepaj ) ali \\) brez zaklepaja"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Prej¹nji regularni izraz manjka"
diff --git a/po/sr.po b/po/sr.po
new file mode 100644
index 0000000..72b7e75
--- /dev/null
+++ b/po/sr.po
@@ -0,0 +1,432 @@
+# Serbian translation of `sed'.
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# This file is distributed under the same license as the `sed' package.
+# Aleksandar Jelenak <jelenak@netlinkplus.net>, 2003.
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.9\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-01-13 22:51-0500\n"
+"Last-Translator: Aleksandar Jelenak <jelenak@netlinkplus.net>\n"
+"Language-Team: Serbian <sr@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : (n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Више „!“"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Ðеочекиван „,“"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "Ðе може Ñе кориÑтити +N или ~N као прва адреÑа"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "ÐеÑпарена „{“"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Ðеочекивана „}“"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Вишак знакова поÑле наредбе"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "Очекивано \\ поÑле „a“, „c“ или „i“"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "„}“ не захтева икакве адреÑе"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": не захтева икакве адреÑе"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Коментари не прихватају икакве адреÑе"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "ÐедоÑтаје наредба"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Ðаредба кориÑти Ñамо једну адреÑу"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Ðезавршена адреÑа рег. израза"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Ðезавршена наредба „s“"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Ðезавршена наредба „y“"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "Ðепозната опција за „s“"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "више „p“ опција за „s“ наредбу"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "више „g“ опција за „s“ наредбу"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "више бројчаних опција за „s“ наредбу"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "бројчана опција наредбе „s“ не може бити нула"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "ниÑке за команду „y“ Ñу различите дужине"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "очекивана новија верзија sed-а"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Ðаредба кориÑти Ñамо једну адреÑу"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Ðепозната наредба:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: датотека %s ред %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e израз #%lu, знак %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "Ðе могу да нађем ознаку за Ñкок на „%s“"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: не може читати %s: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "Ðе може Ñе отворити датотека %s: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Ðе могу отворити привремену датотеку %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "грешка у потпроцеÑу"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "опција „e“ није подржана"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "наредба „e“ није подржана"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Без претходног регуларног израза"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Ðе може Ñе навеÑти измењивач празном рег. изразу"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "ÐеиÑправна референца \\%d на деÑној Ñтрани наредбе „s“"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" кориÑти у ÑпиÑу ÑинтакÑу Перла 5 за регуларне изразе.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" кориÑти у ÑпиÑу ÑинтакÑу Перла 5 за регуларне изразе.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Епошта за пријаву грешака: %s .\n"
+"ПоÑтарајте Ñе да укључите реч „%s“ негде у „Subject:“ пољу.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed верзија %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"заÑновано на ГÐУ sed верзија 3.02.80\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "ГÐУ sed верзија %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Ово је Ñлободан Ñофтвер; видети изворни кôд за уÑлове умножавања. БЕЗ "
+"икакве\n"
+"гаранције; чак ни о ТРЖИШÐОСТИ или ИСПУЊÐÐ’ÐЊУ ОДРЕЂЕÐЕ ПОТРЕБЕ.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: не може читати %s: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "Ðе може Ñе отворити датотека %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "неуÑпешан Ð·Ð°Ð¿Ð¸Ñ %d Ñтавке на %s: %s"
+msgstr[1] "неуÑпешан Ð·Ð°Ð¿Ð¸Ñ %d Ñтавке на %s: %s"
+msgstr[2] "неуÑпешан Ð·Ð°Ð¿Ð¸Ñ %d Ñтавки на %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "грешка учитавања на %s: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: не може читати %s: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "УÑпех"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Без поклапања"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "ÐеиÑправни регуларни израз"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "ÐеиÑправни знак прикупљања"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "ÐеиÑправно име клаÑе знакова"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Пратећа обрнута коÑа црта"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "ÐеиÑправна повратна референца"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "ÐеÑпарено [ или ^["
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "ÐеÑпарено ( или \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "ÐеÑпарено \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "ÐеиÑправни Ñадржај у \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "ÐеиÑправни крај опÑега"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Меморија иÑцрпљена"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "ÐеиÑправан претходећи регуларни израз"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Преран крај регуларног израза"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Регуларни израз Ñувише велик"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "ÐеÑпарено ) или \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Без претходног регуларног израза"
diff --git a/po/sv.po b/po/sv.po
new file mode 100644
index 0000000..111e676
--- /dev/null
+++ b/po/sv.po
@@ -0,0 +1,446 @@
+# Swedish messages for sed.
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Christian Rose <menthos@menthos.com>, 1999, 2000, 2001, 2002, 2003, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.1.1\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-07-11 11:45+0200\n"
+"Last-Translator: Christian Rose <menthos@menthos.com>\n"
+"Language-Team: Swedish <sv@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=iso-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: sed/compile.c:162
+msgid "multiple `!'s"
+msgstr "flera \"!\""
+
+#: sed/compile.c:163
+msgid "unexpected `,'"
+msgstr "oväntat \",\""
+
+#: sed/compile.c:164
+msgid "invalid usage of +N or ~N as first address"
+msgstr "ogiltig användning av +N eller ~N som första adress"
+
+#: sed/compile.c:165
+msgid "unmatched `{'"
+msgstr "obalanserad \"{\""
+
+#: sed/compile.c:166
+msgid "unexpected `}'"
+msgstr "oväntad \"}\""
+
+#: sed/compile.c:167
+msgid "extra characters after command"
+msgstr "extra tecken efter kommandot"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "\\ förväntades efter \"a\", \"c\" eller \"i\""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "\"}\" vill inte ha några adresser"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": vill inte ha några adresser"
+
+#: sed/compile.c:171
+msgid "comments don't accept any addresses"
+msgstr "kommentarer accepterar inga adresser"
+
+#: sed/compile.c:172
+msgid "missing command"
+msgstr "kommando saknas"
+
+#: sed/compile.c:173
+msgid "command only uses one address"
+msgstr "kommandot använder endast en adress"
+
+#: sed/compile.c:174
+msgid "unterminated address regex"
+msgstr "oavslutat reguljärt uttryck för adress"
+
+#: sed/compile.c:175
+msgid "unterminated `s' command"
+msgstr "oavslutat \"s\"-kommando"
+
+#: sed/compile.c:176
+msgid "unterminated `y' command"
+msgstr "oavslutat \"y\"-kommando"
+
+#: sed/compile.c:177
+msgid "unknown option to `s'"
+msgstr "flaggan okänd för \"s\""
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "flera \"p\"-flaggor till \"s\"-kommandot"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "flera \"g\"-flaggor till \"s\"-kommandot"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "flera sifferflaggor till \"s\"-kommandot"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "sifferflagga till kommandot \"s\" får inte vara noll"
+
+#: sed/compile.c:182
+msgid "strings for `y' command are different lengths"
+msgstr "strängarna för kommandot \"y\" är olika långa"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr "avgränsningstecknet är inte en ensam byte"
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "nyare version av sed förväntades"
+
+#: sed/compile.c:185
+msgid "invalid usage of line address 0"
+msgstr "felaktig användning av radadress 0"
+
+#: sed/compile.c:186
+#, c-format
+msgid "unknown command: `%c'"
+msgstr "okänt kommando: \"%c\""
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: fil %s rad %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e uttryck #%lu, tecken %lu: %s\n"
+
+#: sed/compile.c:1644
+#, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "kan inte hitta etiketten för hopp till \"%s\""
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: kan inte läsa %s: %s\n"
+
+#: sed/execute.c:672
+#, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "kunde inte redigera %s: är en terminal"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr "kunde inte redigera %s: inte en vanlig fil"
+
+#: sed/execute.c:683 lib/utils.c:196
+#, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "kunde inte öppna temporära filen %s: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "fel i underprocess"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr "flaggan \"e\" stöds inte"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "kommandot \"e\" stöds inte"
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr "inget tidigare reguljärt uttryck"
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr "kan inte ange modifierare på tomt reguljärt uttryck"
+
+# Kommentar från Jan Djärv:
+# Jag antar RHS står för "right hand side". Man kan då säga
+# "... kommandots högersida"
+#
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "ogiltig referens \\%d på \"s\"-kommandots högersida"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" använd Perl 5:s syntax för reguljära uttryck i skriptet.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+"Användning: %s [FLAGGA]... {skript-endast-om-inga-andra} [indatafil]...\n"
+"\n"
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+" -n, --quiet, --silent\n"
+" förhindrar automatisk utskrift av mönsterutrymme\n"
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+" -e skript, --expression=skript\n"
+" lägg till skript till de kommandon som ska utföras\n"
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+" -f skriptfil, --file=skriptfil\n"
+" lägg till innehållet i skriptfil till de kommandon som ska\n"
+" utföras\n"
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+" -i[ÄNDELSE], --in-place[=ÄNDELSE]\n"
+" redigera filer på plats (skapar säkerhetskopia om ändelse\n"
+" tillhandahålls)\n"
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+" -l N, --line-length=N\n"
+" ange önskad radbrytningslängd för \"l\"-kommandot\n"
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+" --posix\n"
+" inaktivera alla GNU-utökningar.\n"
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -r, --regexp-extended\n"
+" använd utökade reguljära uttryck i skriptet.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+" -s, --separate\n"
+" betrakta filer som separata istället för som en\n"
+" kontinuerlig lång dataström.\n"
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+" -u, --unbuffered\n"
+" läs in minimala mängder data från indatafilerna och töm\n"
+" utdatabufferterna oftare\n"
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr " --help visa denna hjälptext och avsluta\n"
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr " --version visa versionsinformation och avsluta\n"
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+"\n"
+"Om ingen av flaggorna -e, --expression, -f, eller --file ges, blir det\n"
+"första argumentet som inte är en flagga det sed-skript som tolkas. Alla\n"
+"återstående argument är namn på indatafiler. Om inga indatafiler är angivna\n"
+"läses standard in.\n"
+"\n"
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Rapportera fel till: %s .\n"
+"Ange ordet \"%s\" på något ställe i \"Ärende:\"-fältet.\n"
+"Skicka anmärkningar på översättningen till <sv@li.org>.\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed version %s\n"
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"baserad på GNU sed version %s\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed version %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"(Följande text är en informell översättning som enbart tillhandahålls\n"
+" i informativt syfte. För alla juridiska tolkningar gäller den engelska\n"
+" originaltexten.)\n"
+"Det här är fri programvara; se källkoden angående villkor för kopiering.\n"
+"Det finns INGEN garanti; inte ens för SÄLJBARHET eller LÄMPLIGHET FÖR NÅGOT\n"
+"SPECIELLT ÄNDAMÅL, i den omfattning som medges av gällande lag.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, c-format
+msgid "cannot remove %s: %s"
+msgstr "kan inte ta bort %s: %s"
+
+#: lib/utils.c:143
+#, c-format
+msgid "couldn't open file %s: %s"
+msgstr "kunde inte öppna filen %s: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "kunde inte skriva %d objekt till %s: %s"
+msgstr[1] "kunde inte skriva %d objekt till %s: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "läsfel vid %s: %s"
+
+#: lib/utils.c:341
+#, c-format
+msgid "cannot rename %s: %s"
+msgstr "kan inte byta namn på %s: %s"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Lyckades"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Ingen träff"
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Ogiltigt reguljärt uttryck"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Ogiltigt sorteringstecken"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Ogiltigt teckenklassnamn"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Eftersläpande omvänt snedstreck"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Ogiltig bakåtreferens"
+
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "Obalanserad [ eller [^"
+
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "Obalanserad ( eller \\("
+
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "Obalanserad \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "Ogiltigt innehåll i \\{\\}"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Ogiltigt intervallslut"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Minnet slut"
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Ogiltigt föregående reguljärt uttryck"
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "För tidigt slut på reguljärt uttryck"
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Reguljärt uttryck för stort"
+
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "Obalanserad ) eller \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Inget tidigare reguljärt uttryck"
diff --git a/po/tr.po b/po/tr.po
new file mode 100644
index 0000000..a74ed33
--- /dev/null
+++ b/po/tr.po
@@ -0,0 +1,439 @@
+# translation of sed-4.0.9.tr.po to Turkish
+# Copyright (C) 2003 Free Software Foundation, Inc.
+# Deniz Akkus Kanca <deniz@arayan.com>, 2001,2003, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 4.0.9\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2004-05-19 18:06+0300\n"
+"Last-Translator: Deniz Akkus Kanca <deniz@arayan.com>\n"
+"Language-Team: Turkish <gnu-tr-u12a@lists.sourceforge.net>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: KBabel 1.0.2\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "Birden fazla '!'"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "Beklenmeyen ','"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "İlk adres olarak +N veya ~N kullanılamaz"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "EÅŸleÅŸmeyen '{'"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "Beklenmeyen '}'"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "Komuttan sonra fazla karakterler var"
+
+#: sed/compile.c:168
+#, fuzzy
+msgid "expected \\ after `a', `c' or `i'"
+msgstr "`a', `c' veya `i' sonrası \\ beklendi"
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "'}' için adres istenmez"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": için hiç adres istenmez"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "Açıklamalarda adres kabul edilmez"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "Komut eksik"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "Komutta yalnızca tek adres kullanılır"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "Sonlandırılmamış adres düzenli ifadesi"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "Sonlandırılmamış 's' komutu"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "Sonlandırılmamış 'y' komutu"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "`s' komutuna bilinmeyen seçenek verilmiş"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "`s' komutuna birden fazla `p' seçeneği verilmiş"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "`s' komutuna birden fazla `g' seçeneği verilmiş"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "`s' komutuna birden fazla sayı seçeneği verilmiş"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "`s' komutuna verilen sayı seçeneği sıfır olamaz"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "y komutu için dizgeler değişik uzunluklarda"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr "sed'in daha yeni bir sürümü beklendi"
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "Adres değiştirici hatalı kullanılmış"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "Bilinmeyen komut:"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s: dosya %s satır %lu: %s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s: -e ifade #%lu, harf %lu: %s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "`%s'e atlamak için etiket bulunamıyor"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s: %s okunamıyor: %s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "%d sayıda öğe %s'e yazılamadı: %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "Geçici dosya %s açılamadı: %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr "altsüreçte hata"
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr " e' seçeneği desteklenmiyor"
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr "`e' komutu desteklenmiyor"
+
+#: sed/regexp.c:39
+#, fuzzy
+msgid "no previous regular expression"
+msgstr "Daha önce düzenli ifade yok"
+
+#: sed/regexp.c:40
+#, fuzzy
+msgid "cannot specify modifiers on empty regexp"
+msgstr "Boş düzenli ifadeye değiştirici atanamaz"
+
+#: sed/regexp.c:134
+#, fuzzy, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr "`s' komutunun RHS'sinde geçersiz \\%d referansı"
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" betikte Perl 5'in düzenli ifade sözdizimini kullanır.\n"
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+#, fuzzy
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+" -R, --regexp-perl\n"
+" betikte Perl 5'in düzenli ifade sözdizimini kullanır.\n"
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"Yazılım hatalarını %s adresine, çeviri hatalarını \n"
+"<gnu-tr-u12a@lists.sourceforge.net> adresine bildirin. \n"
+"``%s'' sözcüğünün Konu başlığında yer almasına dikkat edin. \n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr "super-sed sürüm %s\n"
+
+#: sed/sed.c:272
+#, fuzzy, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+"GNU sed sürümü 3.02.80 temel alınmıştır\n"
+"\n"
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr "GNU sed sürümü %s\n"
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+"%s\n"
+"Bu serbest yazılımdır; kopyalama koşulları için kaynak koduna bakınız.\n"
+"Hiçbir garantisi yoktur; hatta SATILABİLİRLİĞİ veya HERHANGİ BİR AMACA\n"
+"UYGUNLUĞU için bile garanti verilmez.\n"
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s: %s okunamıyor: %s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "%s dosyası açılamadı: %s"
+
+#: lib/utils.c:220
+#, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "%d sayıda öğe %s'e yazılamadı: %s"
+msgstr[1] "%d sayıda öğe %s'e yazılamadı: %s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "%s'de okuma hatası: %s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s: %s okunamıyor: %s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr "Başarılı"
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr "Eşleşme bulunamadı"
+
+#
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr "Hatalı düzenli ifade"
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr "Hatalı birleştirme karakteri"
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr "Hatalı karakter sınıf ismi"
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr "Sonda fazla gerikesme var"
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr "Hatalı geri referans"
+
+#
+#: lib/regcomp.c:171
+msgid "Unmatched [ or [^"
+msgstr "EÅŸleÅŸmeyen [ veya [^"
+
+#
+#: lib/regcomp.c:174
+msgid "Unmatched ( or \\("
+msgstr "EÅŸleÅŸmeyen ( veya \\("
+
+#
+#: lib/regcomp.c:177
+msgid "Unmatched \\{"
+msgstr "EÅŸleÅŸmeyen \\{"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr "\\{\\} içeriği hatalı"
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr "Geçersiz kapsam sonu"
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr "Bellek tükendi"
+
+#
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr "Bir önceki düzenli ifade hatalı"
+
+#
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr "Düzenli ifade erken sonlandı"
+
+#
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr "Düzenli ifade fazla büyük"
+
+#
+#: lib/regcomp.c:198
+msgid "Unmatched ) or \\)"
+msgstr "EÅŸleÅŸmeyen ) or \\)"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr "Daha önce düzenli ifade yok"
diff --git a/po/zh_CN.po b/po/zh_CN.po
new file mode 100644
index 0000000..2a5ef9a
--- /dev/null
+++ b/po/zh_CN.po
@@ -0,0 +1,419 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) 2002 Free Software Foundation, Inc.
+# Wang Li <charles@linux.net.cn>, 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sed 3.02.80\n"
+"POT-Creation-Date: 2004-08-21 20:46+0200\n"
+"PO-Revision-Date: 2002-08-18 11:11+0800\n"
+"Last-Translator: Wang Li <charles@linux.net.cn>\n"
+"Language-Team: Chinese (simplified) <i18n-translation@lists.linux.net.cn>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=gb2312\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Report-Msgid-Bugs-To: \n"
+
+#: sed/compile.c:162
+#, fuzzy
+msgid "multiple `!'s"
+msgstr "¶à¸ö¡°!¡±"
+
+#: sed/compile.c:163
+#, fuzzy
+msgid "unexpected `,'"
+msgstr "ÒâÍâµÄ¡°,¡±"
+
+#: sed/compile.c:164
+#, fuzzy
+msgid "invalid usage of +N or ~N as first address"
+msgstr "ÎÞ·¨½« +N »ò ~N ×÷ΪµÚÒ»¸öµØÖ·"
+
+#: sed/compile.c:165
+#, fuzzy
+msgid "unmatched `{'"
+msgstr "δƥÅäµÄ¡°{¡±"
+
+#: sed/compile.c:166
+#, fuzzy
+msgid "unexpected `}'"
+msgstr "δƥÅäµÄ¡°}¡±"
+
+#: sed/compile.c:167
+#, fuzzy
+msgid "extra characters after command"
+msgstr "ÃüÁîºóº¬ÓжàÓàµÄ×Ö·û"
+
+#: sed/compile.c:168
+msgid "expected \\ after `a', `c' or `i'"
+msgstr ""
+
+#: sed/compile.c:169
+msgid "`}' doesn't want any addresses"
+msgstr "¡°}¡±²»ÐèÒªÈκεØÖ·"
+
+#: sed/compile.c:170
+msgid ": doesn't want any addresses"
+msgstr ": ²»ÐèÒªÈκεØÖ·"
+
+#: sed/compile.c:171
+#, fuzzy
+msgid "comments don't accept any addresses"
+msgstr "×¢ÊͲ»½ÓÊÜÈκεØÖ·"
+
+#: sed/compile.c:172
+#, fuzzy
+msgid "missing command"
+msgstr "ÒÅ©ÃüÁî"
+
+#: sed/compile.c:173
+#, fuzzy
+msgid "command only uses one address"
+msgstr "ÃüÁîֻʹÓÃÒ»¸öµØÖ·"
+
+#: sed/compile.c:174
+#, fuzzy
+msgid "unterminated address regex"
+msgstr "δÖÕÖ¹µÄµØÖ·³£¹æ±í´ïʽ"
+
+#: sed/compile.c:175
+#, fuzzy
+msgid "unterminated `s' command"
+msgstr "δÖÕÖ¹µÄ¡°s¡±ÃüÁî"
+
+#: sed/compile.c:176
+#, fuzzy
+msgid "unterminated `y' command"
+msgstr "δÖÕÖ¹µÄ¡°y¡±ÃüÁî"
+
+#: sed/compile.c:177
+#, fuzzy
+msgid "unknown option to `s'"
+msgstr "¡°s¡±µÄδ֪ѡÏî"
+
+#: sed/compile.c:178
+msgid "multiple `p' options to `s' command"
+msgstr "¶à¸ö¡°s¡±ÃüÁîµÄÑ¡Ïî¡°p¡±"
+
+#: sed/compile.c:179
+msgid "multiple `g' options to `s' command"
+msgstr "¶à¸ö¡°s¡±ÃüÁîµÄÑ¡Ïî¡°g¡±"
+
+#: sed/compile.c:180
+msgid "multiple number options to `s' command"
+msgstr "¶à¸ö¡°s¡±ÃüÁîµÄÊýֵѡÏî"
+
+#: sed/compile.c:181
+msgid "number option to `s' command may not be zero"
+msgstr "¡°s¡±ÃüÁîµÄÊýֵѡÏî²»ÄÜΪÁã"
+
+#: sed/compile.c:182
+#, fuzzy
+msgid "strings for `y' command are different lengths"
+msgstr "y ÃüÁîµÄ×Ö·û´®³¤¶È²»Í¬"
+
+#: sed/compile.c:183
+msgid "delimiter character is not a single-byte character"
+msgstr ""
+
+#: sed/compile.c:184
+msgid "expected newer version of sed"
+msgstr ""
+
+#: sed/compile.c:185
+#, fuzzy
+msgid "invalid usage of line address 0"
+msgstr "·Ç·¨Ê¹ÓõØÖ·ÐÞÊηû"
+
+#: sed/compile.c:186
+#, fuzzy, c-format
+msgid "unknown command: `%c'"
+msgstr "δ֪µÄÃüÁ"
+
+#: sed/compile.c:209
+#, c-format
+msgid "%s: file %s line %lu: %s\n"
+msgstr "%s£ºÎļþ %s Ðкţº%lu£º%s\n"
+
+#: sed/compile.c:212
+#, c-format
+msgid "%s: -e expression #%lu, char %lu: %s\n"
+msgstr "%s£º-e ±í´ïʽ #%lu£¬×Ö·û %lu£º%s\n"
+
+#: sed/compile.c:1644
+#, fuzzy, c-format
+msgid "can't find label for jump to `%s'"
+msgstr "ÎÞ·¨ÎªÄ¿µÄΪ¡°%s¡±µÄÌøתÕÒµ½±êÇ©"
+
+#: sed/execute.c:649
+#, c-format
+msgid "%s: can't read %s: %s\n"
+msgstr "%s£ºÎÞ·¨¶ÁÈ¡ %s£º%s\n"
+
+#: sed/execute.c:672
+#, fuzzy, c-format
+msgid "couldn't edit %s: is a terminal"
+msgstr "ÎÞ·¨´ò¿ªÎļþ %s"
+
+#: sed/execute.c:676
+#, c-format
+msgid "couldn't edit %s: not a regular file"
+msgstr ""
+
+#: sed/execute.c:683 lib/utils.c:196
+#, fuzzy, c-format
+msgid "couldn't open temporary file %s: %s"
+msgstr "ÎÞ·¨´ò¿ªÎļþ %s"
+
+#: sed/execute.c:1207 sed/execute.c:1388
+msgid "error in subprocess"
+msgstr ""
+
+#: sed/execute.c:1209
+msgid "option `e' not supported"
+msgstr ""
+
+#: sed/execute.c:1390
+msgid "`e' command not supported"
+msgstr ""
+
+#: sed/regexp.c:39
+msgid "no previous regular expression"
+msgstr ""
+
+#: sed/regexp.c:40
+msgid "cannot specify modifiers on empty regexp"
+msgstr ""
+
+#: sed/regexp.c:134
+#, c-format
+msgid "invalid reference \\%d on `s' command's RHS"
+msgstr ""
+
+#: sed/sed.c:96
+msgid ""
+" -R, --regexp-perl\n"
+" use Perl 5's regular expressions syntax in the script.\n"
+msgstr ""
+
+#: sed/sed.c:101
+#, c-format
+msgid ""
+"Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:105
+msgid ""
+" -n, --quiet, --silent\n"
+" suppress automatic printing of pattern space\n"
+msgstr ""
+
+#: sed/sed.c:107
+msgid ""
+" -e script, --expression=script\n"
+" add the script to the commands to be executed\n"
+msgstr ""
+
+#: sed/sed.c:109
+msgid ""
+" -f script-file, --file=script-file\n"
+" add the contents of script-file to the commands to be "
+"executed\n"
+msgstr ""
+
+#: sed/sed.c:111
+msgid ""
+" -i[SUFFIX], --in-place[=SUFFIX]\n"
+" edit files in place (makes backup if extension supplied)\n"
+msgstr ""
+
+#: sed/sed.c:113
+msgid ""
+" -l N, --line-length=N\n"
+" specify the desired line-wrap length for the `l' command\n"
+msgstr ""
+
+#: sed/sed.c:115
+msgid ""
+" --posix\n"
+" disable all GNU extensions.\n"
+msgstr ""
+
+#: sed/sed.c:117
+msgid ""
+" -r, --regexp-extended\n"
+" use extended regular expressions in the script.\n"
+msgstr ""
+
+#: sed/sed.c:120
+msgid ""
+" -s, --separate\n"
+" consider files as separate rather than as a single "
+"continuous\n"
+" long stream.\n"
+msgstr ""
+
+#: sed/sed.c:123
+msgid ""
+" -u, --unbuffered\n"
+" load minimal amounts of data from the input files and "
+"flush\n"
+" the output buffers more often\n"
+msgstr ""
+
+#: sed/sed.c:126
+msgid " --help display this help and exit\n"
+msgstr ""
+
+#: sed/sed.c:127
+msgid " --version output version information and exit\n"
+msgstr ""
+
+#: sed/sed.c:128
+msgid ""
+"\n"
+"If no -e, --expression, -f, or --file option is given, then the first\n"
+"non-option argument is taken as the sed script to interpret. All\n"
+"remaining arguments are names of input files; if no input files are\n"
+"specified, then the standard input is read.\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:134
+#, c-format
+msgid ""
+"E-mail bug reports to: %s .\n"
+"Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"
+msgstr ""
+"½«´íÎ󱨸æͨ¹ýµç×ÓÓʼþ·¢Ë͵½£º%s .\n"
+"ÇëÎñ±Ø½«µ¥´Ê¡°%s¡±·ÅÔÚ¡°Subject:¡±ÓòµÄij´¦¡£\n"
+
+#: sed/sed.c:271
+#, c-format
+msgid "super-sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:272
+#, c-format
+msgid ""
+"based on GNU sed version %s\n"
+"\n"
+msgstr ""
+
+#: sed/sed.c:274
+#, c-format
+msgid "GNU sed version %s\n"
+msgstr ""
+
+#: sed/sed.c:276
+#, c-format
+msgid ""
+"%s\n"
+"This is free software; see the source for copying conditions. There is NO\n"
+"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+"to the extent permitted by law.\n"
+msgstr ""
+
+#: lib/utils.c:98 lib/utils.c:336
+#, fuzzy, c-format
+msgid "cannot remove %s: %s"
+msgstr "%s£ºÎÞ·¨¶ÁÈ¡ %s£º%s\n"
+
+#: lib/utils.c:143
+#, fuzzy, c-format
+msgid "couldn't open file %s: %s"
+msgstr "ÎÞ·¨´ò¿ªÎļþ %s"
+
+#: lib/utils.c:220
+#, fuzzy, c-format
+msgid "couldn't write %d item to %s: %s"
+msgid_plural "couldn't write %d items to %s: %s"
+msgstr[0] "ÎÞ·¨½« %d ¸öÏîĿдÈë %s£º%s"
+msgstr[1] "ÎÞ·¨½« %d ¸öÏîĿдÈë %s£º%s"
+
+#: lib/utils.c:235 lib/utils.c:251
+#, c-format
+msgid "read error on %s: %s"
+msgstr "¶ÁÈ¡ %s ³ö´í£º%s"
+
+#: lib/utils.c:341
+#, fuzzy, c-format
+msgid "cannot rename %s: %s"
+msgstr "%s£ºÎÞ·¨¶ÁÈ¡ %s£º%s\n"
+
+#: lib/regcomp.c:150
+msgid "Success"
+msgstr ""
+
+#: lib/regcomp.c:153
+msgid "No match"
+msgstr ""
+
+#: lib/regcomp.c:156
+msgid "Invalid regular expression"
+msgstr ""
+
+#: lib/regcomp.c:159
+msgid "Invalid collation character"
+msgstr ""
+
+#: lib/regcomp.c:162
+msgid "Invalid character class name"
+msgstr ""
+
+#: lib/regcomp.c:165
+msgid "Trailing backslash"
+msgstr ""
+
+#: lib/regcomp.c:168
+msgid "Invalid back reference"
+msgstr ""
+
+#: lib/regcomp.c:171
+#, fuzzy
+msgid "Unmatched [ or [^"
+msgstr "δƥÅäµÄ¡°{¡±"
+
+#: lib/regcomp.c:174
+#, fuzzy
+msgid "Unmatched ( or \\("
+msgstr "δƥÅäµÄ¡°{¡±"
+
+#: lib/regcomp.c:177
+#, fuzzy
+msgid "Unmatched \\{"
+msgstr "δƥÅäµÄ¡°{¡±"
+
+#: lib/regcomp.c:180
+msgid "Invalid content of \\{\\}"
+msgstr ""
+
+#: lib/regcomp.c:183
+msgid "Invalid range end"
+msgstr ""
+
+#: lib/regcomp.c:186
+msgid "Memory exhausted"
+msgstr ""
+
+#: lib/regcomp.c:189
+msgid "Invalid preceding regular expression"
+msgstr ""
+
+#: lib/regcomp.c:192
+msgid "Premature end of regular expression"
+msgstr ""
+
+#: lib/regcomp.c:195
+msgid "Regular expression too big"
+msgstr ""
+
+#: lib/regcomp.c:198
+#, fuzzy
+msgid "Unmatched ) or \\)"
+msgstr "δƥÅäµÄ¡°{¡±"
+
+#: lib/regcomp.c:672
+msgid "No previous regular expression"
+msgstr ""
diff --git a/sed/Makefile.am b/sed/Makefile.am
new file mode 100644
index 0000000..ec40c20
--- /dev/null
+++ b/sed/Makefile.am
@@ -0,0 +1,18 @@
+## Process this file with automake to produce Makefile.in
+bin_PROGRAMS = sed
+
+localedir = $(datadir)/locale
+
+sed_SOURCES = sed.c compile.c execute.c regexp.c fmt.c mbcs.c
+noinst_HEADERS = sed.h
+
+AM_CPPFLAGS = -I$(top_srcdir)/lib -I$(top_srcdir)/intl \
+ -I$(top_srcdir) -I$(top_builddir)/lib \
+ -DLOCALEDIR=\"$(localedir)\"
+
+sed_LDADD = ../lib/libsed.a @INTLLIBS@
+sed_DEPENDENCIES = ../lib/libsed.a
+
+$(PROGRAMS): $(LDADD)
+
+
diff --git a/sed/compile.c b/sed/compile.c
new file mode 100644
index 0000000..869817f
--- /dev/null
+++ b/sed/compile.c
@@ -0,0 +1,1721 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* compile.c: translate sed source into internal form */
+
+#include "sed.h"
+#include "strverscmp.h"
+#include <stdio.h>
+#include <ctype.h>
+
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+# ifdef HAVE_MEMORY_H
+# include <memory.h>
+# endif
+#else
+# include <string.h>
+#endif /* HAVE_STRINGS_H */
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#ifndef EXIT_FAILURE
+# define EXIT_FAILURE 1
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#include <obstack.h>
+
+
+#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
+#define VECTOR_ALLOC_INCREMENT 40
+
+/* let's not confuse text editors that have only dumb bracket-matching... */
+#define OPEN_BRACKET '['
+#define CLOSE_BRACKET ']'
+#define OPEN_BRACE '{'
+#define CLOSE_BRACE '}'
+
+struct prog_info {
+ /* When we're reading a script command from a string, `prog.base'
+ points to the first character in the string, 'prog.cur' points
+ to the current character in the string, and 'prog.end' points
+ to the end of the string. This allows us to compile script
+ strings that contain nulls. */
+ const unsigned char *base;
+ const unsigned char *cur;
+ const unsigned char *end;
+
+ /* This is the current script file. If it is NULL, we are reading
+ from a string stored at `prog.cur' instead. If both `prog.file'
+ and `prog.cur' are NULL, we're in trouble! */
+ FILE *file;
+};
+
+/* Information used to give out useful and informative error messages. */
+struct error_info {
+ /* This is the name of the current script file. */
+ const char *name;
+
+ /* This is the number of the current script line that we're compiling. */
+ countT line;
+
+ /* This is the index of the "-e" expressions on the command line. */
+ countT string_expr_count;
+};
+
+
+/* Label structure used to resolve GOTO's, labels, and block beginnings. */
+struct sed_label {
+ countT v_index; /* index of vector element being referenced */
+ char *name; /* NUL-terminated name of the label */
+ struct error_info err_info; /* track where `{}' blocks start */
+ struct sed_label *next; /* linked list (stack) */
+};
+
+struct special_files {
+ struct output outf;
+ FILE **pfp;
+};
+
+FILE *my_stdin, *my_stdout, *my_stderr;
+struct special_files special_files[] = {
+ { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
+ { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
+ { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
+ { { NULL, false, NULL, NULL }, NULL }
+};
+
+
+/* Where we are in the processing of the input. */
+static struct prog_info prog;
+static struct error_info cur_input;
+
+/* Information about labels and jumps-to-labels. This is used to do
+ the required backpatching after we have compiled all the scripts. */
+static struct sed_label *jumps = NULL;
+static struct sed_label *labels = NULL;
+
+/* We wish to detect #n magic only in the first input argument;
+ this flag tracks when we have consumed the first file of input. */
+static bool first_script = true;
+
+/* Allow for scripts like "sed -e 'i\' -e foo": */
+static struct buffer *pending_text = NULL;
+static struct text_buf *old_text_buf = NULL;
+
+/* Information about block start positions. This is used to backpatch
+ block end positions. */
+static struct sed_label *blocks = NULL;
+
+/* Use an obstack for compilation. */
+static struct obstack obs;
+
+/* Various error messages we may want to print */
+static const char errors[] =
+ "multiple `!'s\0"
+ "unexpected `,'\0"
+ "invalid usage of +N or ~N as first address\0"
+ "unmatched `{'\0"
+ "unexpected `}'\0"
+ "extra characters after command\0"
+ "expected \\ after `a', `c' or `i'\0"
+ "`}' doesn't want any addresses\0"
+ ": doesn't want any addresses\0"
+ "comments don't accept any addresses\0"
+ "missing command\0"
+ "command only uses one address\0"
+ "unterminated address regex\0"
+ "unterminated `s' command\0"
+ "unterminated `y' command\0"
+ "unknown option to `s'\0"
+ "multiple `p' options to `s' command\0"
+ "multiple `g' options to `s' command\0"
+ "multiple number options to `s' command\0"
+ "number option to `s' command may not be zero\0"
+ "strings for `y' command are different lengths\0"
+ "delimiter character is not a single-byte character\0"
+ "expected newer version of sed\0"
+ "invalid usage of line address 0\0"
+ "unknown command: `%c'";
+
+#define BAD_BANG (errors)
+#define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
+#define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
+#define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
+#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
+#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
+#define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
+#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
+#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
+#define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
+#define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
+#define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
+#define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
+#define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
+#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
+#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
+#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
+#define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
+#define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
+#define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
+#define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
+#define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
+#define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
+#define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
+#define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
+#define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
+
+static struct output *file_read = NULL;
+static struct output *file_write = NULL;
+
+
+/* Complain about an unknown command and exit. */
+void
+bad_command(ch)
+ char ch;
+{
+ const char *msg = _(UNKNOWN_CMD);
+ char *unknown_cmd = xmalloc(strlen(msg));
+ sprintf(unknown_cmd, msg, ch);
+ bad_prog(unknown_cmd);
+}
+
+/* Complain about a programming error and exit. */
+void
+bad_prog(why)
+ const char *why;
+{
+ if (cur_input.name)
+ fprintf(stderr, _("%s: file %s line %lu: %s\n"),
+ myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
+ else
+ fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
+ myname,
+ CAST(unsigned long)cur_input.string_expr_count,
+ CAST(unsigned long)(prog.cur-prog.base),
+ why);
+ exit(EXIT_FAILURE);
+}
+
+
+/* Read the next character from the program. Return EOF if there isn't
+ anything to read. Keep cur_input.line up to date, so error messages
+ can be meaningful. */
+static int inchar P_((void));
+static int
+inchar()
+{
+ int ch = EOF;
+
+ if (prog.cur)
+ {
+ if (prog.cur < prog.end)
+ ch = *prog.cur++;
+ }
+ else if (prog.file)
+ {
+ if (!feof(prog.file))
+ ch = getc(prog.file);
+ }
+ if (ch == '\n')
+ ++cur_input.line;
+ return ch;
+}
+
+/* unget `ch' so the next call to inchar will return it. */
+static void savchar P_((int ch));
+static void
+savchar(ch)
+ int ch;
+{
+ if (ch == EOF)
+ return;
+ if (ch == '\n' && cur_input.line > 0)
+ --cur_input.line;
+ if (prog.cur)
+ {
+ if (prog.cur <= prog.base || *--prog.cur != ch)
+ panic("Called savchar() with unexpected pushback (%x)",
+ CAST(unsigned char)ch);
+ }
+ else
+ ungetc(ch, prog.file);
+}
+
+/* Read the next non-blank character from the program. */
+static int in_nonblank P_((void));
+static int
+in_nonblank()
+{
+ int ch;
+ do
+ ch = inchar();
+ while (ISBLANK(ch));
+ return ch;
+}
+
+/* Read an integer value from the program. */
+static countT in_integer P_((int ch));
+static countT
+in_integer(ch)
+ int ch;
+{
+ countT num = 0;
+
+ while (ISDIGIT(ch))
+ {
+ num = num * 10 + ch - '0';
+ ch = inchar();
+ }
+ savchar(ch);
+ return num;
+}
+
+static int add_then_next P_((struct buffer *b, int ch));
+static int
+add_then_next(b, ch)
+ struct buffer *b;
+ int ch;
+{
+ add1_buffer(b, ch);
+ return inchar();
+}
+
+static char * convert_number P_((char *, char *, const char *, int, int, int));
+static char *
+convert_number(result, buf, bufend, base, maxdigits, default_char)
+ char *result;
+ char *buf;
+ const char *bufend;
+ int base;
+ int maxdigits;
+ int default_char;
+{
+ int n = 0;
+ char *p;
+
+ for (p=buf; p < bufend && maxdigits-- > 0; ++p)
+ {
+ int d = -1;
+ switch (*p)
+ {
+ case '0': d = 0x0; break;
+ case '1': d = 0x1; break;
+ case '2': d = 0x2; break;
+ case '3': d = 0x3; break;
+ case '4': d = 0x4; break;
+ case '5': d = 0x5; break;
+ case '6': d = 0x6; break;
+ case '7': d = 0x7; break;
+ case '8': d = 0x8; break;
+ case '9': d = 0x9; break;
+ case 'A': case 'a': d = 0xa; break;
+ case 'B': case 'b': d = 0xb; break;
+ case 'C': case 'c': d = 0xc; break;
+ case 'D': case 'd': d = 0xd; break;
+ case 'E': case 'e': d = 0xe; break;
+ case 'F': case 'f': d = 0xf; break;
+ }
+ if (d < 0 || base <= d)
+ break;
+ n = n * base + d;
+ }
+ if (p == buf)
+ *result = default_char;
+ else
+ *result = n;
+ return p;
+}
+
+
+/* Read in a filename for a `r', `w', or `s///w' command. */
+static struct buffer *read_filename P_((void));
+static struct buffer *
+read_filename()
+{
+ struct buffer *b;
+ int ch;
+
+ b = init_buffer();
+ ch = in_nonblank();
+ while (ch != EOF && ch != '\n')
+ {
+#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
+ if (posixicity == POSIXLY_EXTENDED)
+ if (ch == ';' || ch == '#')
+ {
+ savchar(ch);
+ break;
+ }
+#endif
+ ch = add_then_next(b, ch);
+ }
+ add1_buffer(b, '\0');
+ return b;
+}
+
+static struct output *get_openfile P_((struct output **file_ptrs, char *mode, bool fail));
+static struct output *
+get_openfile(file_ptrs, mode, fail)
+ struct output **file_ptrs;
+ char *mode;
+ bool fail;
+{
+ struct buffer *b;
+ char *file_name;
+ struct output *p;
+ int is_stderr;
+
+ b = read_filename();
+ file_name = get_buffer(b);
+ for (p=*file_ptrs; p; p=p->link)
+ if (strcmp(p->name, file_name) == 0)
+ break;
+
+ if (posixicity == POSIXLY_EXTENDED)
+ {
+ /* Check whether it is a special file (stdin, stdout or stderr) */
+ struct special_files *special = special_files;
+
+ /* std* sometimes are not constants, so they
+ cannot be used in the initializer for special_files */
+ my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
+ for (special = special_files; special->outf.name; special++)
+ if (strcmp(special->outf.name, file_name) == 0)
+ {
+ special->outf.fp = *special->pfp;
+ free_buffer (b);
+ return &special->outf;
+ }
+ }
+
+ if (!p)
+ {
+ p = OB_MALLOC(&obs, 1, struct output);
+ p->name = ck_strdup(file_name);
+ p->fp = ck_fopen(p->name, mode, fail);
+ p->missing_newline = false;
+ p->link = *file_ptrs;
+ *file_ptrs = p;
+ }
+ free_buffer(b);
+ return p;
+}
+
+
+static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
+static struct sed_cmd *
+next_cmd_entry(vectorp)
+ struct vector **vectorp;
+{
+ struct sed_cmd *cmd;
+ struct vector *v;
+
+ v = *vectorp;
+ if (v->v_length == v->v_allocated)
+ {
+ v->v_allocated += VECTOR_ALLOC_INCREMENT;
+ v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
+ }
+
+ cmd = v->v + v->v_length;
+ cmd->a1 = NULL;
+ cmd->a2 = NULL;
+ cmd->range_state = RANGE_INACTIVE;
+ cmd->addr_bang = false;
+ cmd->cmd = '\0'; /* something invalid, to catch bugs early */
+
+ *vectorp = v;
+ return cmd;
+}
+
+static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
+static int
+snarf_char_class(b, cur_stat)
+ struct buffer *b;
+ mbstate_t *cur_stat;
+{
+ int ch;
+ int state = 0;
+ int delim;
+ bool pending_mb = 0;
+
+ ch = inchar();
+ if (ch == '^')
+ ch = add_then_next(b, ch);
+ else if (ch == CLOSE_BRACKET)
+ ch = add_then_next(b, ch);
+
+ /* States are:
+ 0 outside a collation element, character class or collation class
+ 1 after the bracket
+ 2 after the opening ./:/=
+ 3 after the closing ./:/= */
+
+ for (;; ch = add_then_next (b, ch))
+ {
+ pending_mb = BRLEN (ch, cur_stat) != 1;
+
+ switch (ch)
+ {
+ case EOF:
+ case '\n':
+ return ch;
+
+ case '.':
+ case ':':
+ case '=':
+ if (pending_mb)
+ continue;
+
+ if (state == 1)
+ {
+ delim = ch;
+ state++;
+ }
+ else if (ch == delim && state == 2)
+ state++;
+ else
+ break;
+
+ continue;
+
+ case OPEN_BRACKET:
+ if (pending_mb)
+ continue;
+
+ state++;
+ continue;
+
+ case CLOSE_BRACKET:
+ if (pending_mb)
+ continue;
+
+ if (state == 0 || state == 1)
+ return ch;
+ else if (state == 3)
+ state = 0;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Getting a character different from .=: whilst in state 1
+ goes back to state 0, getting a character different from ]
+ whilst in state 3 goes back to state 2. */
+ state &= ~1;
+ }
+}
+
+static struct buffer *match_slash P_((int slash, bool regex));
+static struct buffer *
+match_slash(slash, regex)
+ int slash;
+ bool regex;
+{
+ struct buffer *b;
+ int ch;
+ bool pending_mb = false;
+ mbstate_t cur_stat;
+
+ memset (&cur_stat, 0, sizeof (mbstate_t));
+
+ if (BRLEN (slash, &cur_stat) == -2)
+ if (BRLEN (slash, &cur_stat) == -2)
+ bad_prog (BAD_DELIM);
+
+ memset (&cur_stat, 0, sizeof (mbstate_t));
+
+ b = init_buffer();
+ while ((ch = inchar()) != EOF && ch != '\n')
+ {
+ pending_mb = BRLEN (ch, &cur_stat) != 1;
+ pending_mb = BRLEN (ch, &cur_stat) != 1;
+
+ if (!pending_mb)
+ {
+ if (ch == slash)
+ return b;
+ else if (ch == '\\')
+ {
+ ch = inchar();
+ if (ch == EOF)
+ break;
+#ifndef REG_PERL
+ else if (ch == 'n' && regex)
+ ch = '\n';
+#endif
+ else if (ch != '\n' && ch != slash)
+ add1_buffer(b, '\\');
+ }
+ else if (ch == OPEN_BRACKET && regex)
+ {
+ add1_buffer(b, ch);
+ ch = snarf_char_class(b, &cur_stat);
+ if (ch != CLOSE_BRACKET)
+ break;
+ }
+ }
+
+ add1_buffer(b, ch);
+ }
+
+ if (ch == '\n')
+ savchar(ch); /* for proper line number in error report */
+ free_buffer(b);
+ return NULL;
+}
+
+static int mark_subst_opts P_((struct subst *cmd));
+static int
+mark_subst_opts(cmd)
+ struct subst *cmd;
+{
+ int flags = 0;
+ int ch;
+
+ cmd->global = false;
+ cmd->print = false;
+ cmd->eval = false;
+ cmd->numb = 0;
+ cmd->outf = NULL;
+
+ for (;;)
+ switch ( (ch = in_nonblank()) )
+ {
+ case 'i': /* GNU extension */
+ case 'I': /* GNU extension */
+ flags |= REG_ICASE;
+ break;
+
+#ifdef REG_PERL
+ case 's': /* GNU extension */
+ case 'S': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_DOTALL;
+ break;
+
+ case 'x': /* GNU extension */
+ case 'X': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_EXTENDED;
+ break;
+#endif
+
+ case 'm': /* GNU extension */
+ case 'M': /* GNU extension */
+ flags |= REG_NEWLINE;
+ break;
+
+ case 'e':
+ cmd->eval = true;
+ break;
+
+ case 'p':
+ if (cmd->print)
+ bad_prog(_(EXCESS_P_OPT));
+ cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
+ break;
+
+ case 'g':
+ if (cmd->global)
+ bad_prog(_(EXCESS_G_OPT));
+ cmd->global = true;
+ break;
+
+ case 'w':
+ cmd->outf = get_openfile(&file_write, "w", true);
+ return flags;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (cmd->numb)
+ bad_prog(_(EXCESS_N_OPT));
+ cmd->numb = in_integer(ch);
+ if (!cmd->numb)
+ bad_prog(_(ZERO_N_OPT));
+ break;
+
+ case CLOSE_BRACE:
+ case '#':
+ savchar(ch);
+ /* Fall Through */
+ case EOF:
+ case '\n':
+ case ';':
+ return flags;
+
+ case '\r':
+ if (inchar() == '\n')
+ return flags;
+ /* FALLTHROUGH */
+
+ default:
+ bad_prog(_(UNKNOWN_S_OPT));
+ /*NOTREACHED*/
+ }
+}
+
+
+/* read in a label for a `:', `b', or `t' command */
+static char *read_label P_((void));
+static char *
+read_label()
+{
+ struct buffer *b;
+ int ch;
+ char *ret;
+
+ b = init_buffer();
+ ch = in_nonblank();
+
+ while (ch != EOF && ch != '\n'
+ && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
+ ch = add_then_next (b, ch);
+
+ savchar(ch);
+ add1_buffer(b, '\0');
+ ret = ck_strdup(get_buffer(b));
+ free_buffer(b);
+ return ret;
+}
+
+/* Store a label (or label reference) created by a `:', `b', or `t'
+ command so that the jump to/from the label can be backpatched after
+ compilation is complete, or a reference created by a `{' to be
+ backpatched when the corresponding `}' is found. */
+static struct sed_label *setup_label
+ P_((struct sed_label *, countT, char *, const struct error_info *));
+static struct sed_label *
+setup_label(list, idx, name, err_info)
+ struct sed_label *list;
+ countT idx;
+ char *name;
+ const struct error_info *err_info;
+{
+ struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
+ ret->v_index = idx;
+ ret->name = name;
+ if (err_info)
+ MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
+ ret->next = list;
+ return ret;
+}
+
+static struct sed_label *release_label P_((struct sed_label *list_head));
+static struct sed_label *
+release_label(list_head)
+ struct sed_label *list_head;
+{
+ struct sed_label *ret;
+
+ if (!list_head)
+ return NULL;
+ ret = list_head->next;
+
+ FREE(list_head->name);
+
+#if 0
+ /* We use obstacks */
+ FREE(list_head);
+#endif
+ return ret;
+}
+
+static struct replacement *new_replacement P_((char *, size_t,
+ enum replacement_types));
+static struct replacement *
+new_replacement(text, length, type)
+ char *text;
+ size_t length;
+ enum replacement_types type;
+{
+ struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
+
+ r->prefix = text;
+ r->prefix_length = length;
+ r->subst_id = -1;
+ r->repl_type = type;
+
+ /* r-> next = NULL; */
+ return r;
+}
+
+static void setup_replacement P_((struct subst *, const char *, size_t));
+static void
+setup_replacement(sub, text, length)
+ struct subst *sub;
+ const char *text;
+ size_t length;
+{
+ char *base;
+ char *p;
+ char *text_end;
+ enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
+ struct replacement root;
+ struct replacement *tail;
+
+ sub->max_id = 0;
+ base = MEMDUP(text, length, char);
+ length = normalize_text(base, length, TEXT_REPLACEMENT);
+
+ text_end = base + length;
+ tail = &root;
+
+ for (p=base; p<text_end; ++p)
+ {
+ if (*p == '\\')
+ {
+ /* Preceding the backslash may be some literal text: */
+ tail = tail->next =
+ new_replacement(base, CAST(size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+
+ /* Skip the backslash and look for a numeric back-reference,
+ or a case-munging escape if not in POSIX mode: */
+ ++p;
+ if (p < text_end && (posixicity != POSIXLY_BASIC || ISDIGIT (*p)))
+ switch (*p)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ tail->subst_id = *p - '0';
+ if (sub->max_id < tail->subst_id)
+ sub->max_id = tail->subst_id;
+ break;
+
+ case 'L':
+ repl_type = REPL_LOWERCASE;
+ save_type = REPL_LOWERCASE;
+ break;
+
+ case 'U':
+ repl_type = REPL_UPPERCASE;
+ save_type = REPL_UPPERCASE;
+ break;
+
+ case 'E':
+ repl_type = REPL_ASIS;
+ save_type = REPL_ASIS;
+ break;
+
+ case 'l':
+ save_type = repl_type;
+ repl_type |= REPL_LOWERCASE_FIRST;
+ break;
+
+ case 'u':
+ save_type = repl_type;
+ repl_type |= REPL_UPPERCASE_FIRST;
+ break;
+
+ default:
+ p[-1] = *p;
+ ++tail->prefix_length;
+ }
+
+ base = p + 1;
+ }
+ else if (*p == '&')
+ {
+ /* Preceding the ampersand may be some literal text: */
+ tail = tail->next =
+ new_replacement(base, CAST(size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+ tail->subst_id = 0;
+ base = p + 1;
+ }
+ }
+ /* There may be some trailing literal text: */
+ if (base < text_end)
+ tail = tail->next =
+ new_replacement(base, CAST(size_t)(text_end - base), repl_type);
+
+ tail->next = NULL;
+ sub->replacement = root.next;
+}
+
+static void read_text P_((struct text_buf *buf, int leadin_ch));
+static void
+read_text(buf, leadin_ch)
+ struct text_buf *buf;
+ int leadin_ch;
+{
+ int ch;
+
+ /* Should we start afresh (as opposed to continue a partial text)? */
+ if (buf)
+ {
+ if (pending_text)
+ free_buffer(pending_text);
+ pending_text = init_buffer();
+ buf->text = NULL;
+ buf->text_length = 0;
+ old_text_buf = buf;
+ }
+ /* assert(old_text_buf != NULL); */
+
+ if (leadin_ch == EOF)
+ return;
+
+ if (leadin_ch != '\n')
+ add1_buffer(pending_text, leadin_ch);
+
+ ch = inchar();
+ while (ch != EOF && ch != '\n')
+ {
+ if (ch == '\\')
+ {
+ ch = inchar();
+ if (ch != EOF)
+ add1_buffer (pending_text, '\\');
+ }
+
+ if (ch == EOF)
+ {
+ add1_buffer (pending_text, '\n');
+ return;
+ }
+
+ ch = add_then_next (pending_text, ch);
+ }
+
+ add1_buffer(pending_text, '\n');
+ if (!buf)
+ buf = old_text_buf;
+ buf->text_length = normalize_text (get_buffer (pending_text),
+ size_buffer (pending_text), TEXT_BUFFER);
+ buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
+ free_buffer(pending_text);
+ pending_text = NULL;
+}
+
+
+/* Try to read an address for a sed command. If it succeeds,
+ return non-zero and store the resulting address in `*addr'.
+ If the input doesn't look like an address read nothing
+ and return zero. */
+static bool compile_address P_((struct addr *addr, int ch));
+static bool
+compile_address(addr, ch)
+ struct addr *addr;
+ int ch;
+{
+ addr->addr_type = ADDR_IS_NULL;
+ addr->addr_step = 0;
+ addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
+ addr->addr_regex = NULL;
+
+ if (ch == '/' || ch == '\\')
+ {
+ int flags = 0;
+ struct buffer *b;
+ addr->addr_type = ADDR_IS_REGEX;
+ if (ch == '\\')
+ ch = inchar();
+ if ( !(b = match_slash(ch, true)) )
+ bad_prog(_(UNTERM_ADDR_RE));
+
+ for(;;)
+ {
+ ch = in_nonblank();
+ switch(ch)
+ {
+ case 'I': /* GNU extension */
+ flags |= REG_ICASE;
+ break;
+
+#ifdef REG_PERL
+ case 'S': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_DOTALL;
+ break;
+
+ case 'X': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_EXTENDED;
+ break;
+#endif
+
+ case 'M': /* GNU extension */
+ flags |= REG_NEWLINE;
+ break;
+
+ default:
+ savchar (ch);
+ addr->addr_regex = compile_regex (b, flags, 0);
+ free_buffer(b);
+ return true;
+ }
+ }
+ }
+ else if (ISDIGIT(ch))
+ {
+ addr->addr_number = in_integer(ch);
+ addr->addr_type = ADDR_IS_NUM;
+ ch = in_nonblank();
+ if (ch != '~')
+ {
+ savchar(ch);
+ }
+ else
+ {
+ countT step = in_integer(in_nonblank());
+ if (step > 0)
+ {
+ addr->addr_step = step;
+ addr->addr_type = ADDR_IS_NUM_MOD;
+ }
+ }
+ }
+ else if (ch == '+' || ch == '~')
+ {
+ addr->addr_step = in_integer(in_nonblank());
+ if (addr->addr_step==0)
+ ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
+ else if (ch == '+')
+ addr->addr_type = ADDR_IS_STEP;
+ else
+ addr->addr_type = ADDR_IS_STEP_MOD;
+ }
+ else if (ch == '$')
+ {
+ addr->addr_type = ADDR_IS_LAST;
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* Read a program (or a subprogram within `{' `}' pairs) in and store
+ the compiled form in `*vector'. Return a pointer to the new vector. */
+static struct vector *compile_program P_((struct vector *));
+static struct vector *
+compile_program(vector)
+ struct vector *vector;
+{
+ struct sed_cmd *cur_cmd;
+ struct buffer *b;
+ int ch;
+
+ if (!vector)
+ {
+ vector = MALLOC(1, struct vector);
+ vector->v = NULL;
+ vector->v_allocated = 0;
+ vector->v_length = 0;
+
+ obstack_init (&obs);
+ }
+ if (pending_text)
+ read_text(NULL, '\n');
+
+ for (;;)
+ {
+ struct addr a;
+
+ while ((ch=inchar()) == ';' || ISSPACE(ch))
+ ;
+ if (ch == EOF)
+ break;
+
+ cur_cmd = next_cmd_entry(&vector);
+ if (compile_address(&a, ch))
+ {
+ if (a.addr_type == ADDR_IS_STEP
+ || a.addr_type == ADDR_IS_STEP_MOD)
+ bad_prog(_(BAD_STEP));
+
+ cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
+ ch = in_nonblank();
+ if (ch == ',')
+ {
+ if (!compile_address(&a, in_nonblank()))
+ bad_prog(_(BAD_COMMA));
+
+ cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
+ ch = in_nonblank();
+ }
+
+ if (cur_cmd->a1->addr_type == ADDR_IS_NUM
+ && cur_cmd->a1->addr_number == 0
+ && (!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX))
+ bad_prog(_(INVALID_LINE_0));
+ }
+ if (ch == '!')
+ {
+ cur_cmd->addr_bang = true;
+ ch = in_nonblank();
+ if (ch == '!')
+ bad_prog(_(BAD_BANG));
+ }
+
+ /* Do not accept extended commands in --posix mode. Also,
+ a few commands only accept one address in that mode. */
+ if (posixicity == POSIXLY_BASIC)
+ switch (ch)
+ {
+ case 'v': case 'L': case 'Q': case 'T':
+ case 'R': case 'W':
+ bad_command(ch);
+
+ case 'a': case 'i': case 'l':
+ case '=': case 'r':
+ if (cur_cmd->a2)
+ bad_prog(_(ONE_ADDR));
+ }
+
+ cur_cmd->cmd = ch;
+ switch (ch)
+ {
+ case '#':
+ if (cur_cmd->a1)
+ bad_prog(_(NO_SHARP_ADDR));
+ ch = inchar();
+ if (ch=='n' && first_script && cur_input.line < 2)
+ if ( (prog.base && prog.cur==2+prog.base)
+ || (prog.file && !prog.base && 2==ftell(prog.file)))
+ no_default_output = true;
+ while (ch != EOF && ch != '\n')
+ ch = inchar();
+ continue; /* restart the for (;;) loop */
+
+ case 'v':
+ /* This is an extension. Programs needing GNU sed might start
+ * with a `v' command so that other seds will stop.
+ * We compare the version and ignore POSIXLY_CORRECT.
+ */
+ {
+ char *version = read_label ();
+ char *compared_version;
+ compared_version = (*version == '\0') ? "4.0" : version;
+ if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
+ bad_prog(_(ANCIENT_VERSION));
+
+ free (version);
+ posixicity = POSIXLY_EXTENDED;
+ }
+ continue;
+
+ case '{':
+ blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
+ cur_cmd->addr_bang = !cur_cmd->addr_bang;
+ break;
+
+ case '}':
+ if (!blocks)
+ bad_prog(_(EXCESS_CLOSE_BRACE));
+ if (cur_cmd->a1)
+ bad_prog(_(NO_CLOSE_BRACE_ADDR));
+ ch = in_nonblank();
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ vector->v[blocks->v_index].x.jump_index = vector->v_length;
+ blocks = release_label(blocks); /* done with this entry */
+ break;
+
+ case 'e':
+ ch = in_nonblank();
+ if (ch == EOF || ch == '\n')
+ {
+ cur_cmd->x.cmd_txt.text_length = 0;
+ break;
+ }
+ else
+ goto read_text_to_slash;
+
+ case 'a':
+ case 'i':
+ case 'c':
+ ch = in_nonblank();
+
+ read_text_to_slash:
+ if (ch == EOF)
+ bad_prog(_(EXPECTED_SLASH));
+
+ if (ch == '\\')
+ ch = inchar();
+ else
+ {
+ savchar(ch);
+ ch = '\n';
+ }
+
+ read_text(&cur_cmd->x.cmd_txt, ch);
+ break;
+
+ case ':':
+ if (cur_cmd->a1)
+ bad_prog(_(NO_COLON_ADDR));
+ labels = setup_label(labels, vector->v_length, read_label(), NULL);
+ break;
+
+ case 'T':
+ case 'b':
+ case 't':
+ jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
+ break;
+
+ case 'Q':
+ case 'q':
+ if (cur_cmd->a2)
+ bad_prog(_(ONE_ADDR));
+ /* Fall through */
+
+ case 'L':
+ case 'l':
+ ch = in_nonblank();
+ if (ISDIGIT(ch))
+ {
+ cur_cmd->x.int_arg = in_integer(ch);
+ ch = in_nonblank();
+ }
+ else
+ cur_cmd->x.int_arg = -1;
+
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ break;
+
+ case '=':
+ case 'd':
+ case 'D':
+ case 'g':
+ case 'G':
+ case 'h':
+ case 'H':
+ case 'n':
+ case 'N':
+ case 'p':
+ case 'P':
+ case 'x':
+ ch = in_nonblank();
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+ break;
+
+ case 'r':
+ b = read_filename();
+ cur_cmd->x.fname = ck_strdup(get_buffer(b));
+ free_buffer(b);
+ break;
+
+ case 'R':
+ cur_cmd->x.fp = get_openfile(&file_read, "r", false)->fp;
+ break;
+
+ case 'W':
+ case 'w':
+ cur_cmd->x.outf = get_openfile(&file_write, "w", true);
+ break;
+
+ case 's':
+ {
+ struct buffer *b2;
+ int flags;
+ int slash;
+
+ slash = inchar();
+ if ( !(b = match_slash(slash, true)) )
+ bad_prog(_(UNTERM_S_CMD));
+ if ( !(b2 = match_slash(slash, false)) )
+ bad_prog(_(UNTERM_S_CMD));
+
+ cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
+ setup_replacement(cur_cmd->x.cmd_subst,
+ get_buffer(b2), size_buffer(b2));
+ free_buffer(b2);
+
+ flags = mark_subst_opts(cur_cmd->x.cmd_subst);
+ cur_cmd->x.cmd_subst->regx =
+ compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id);
+ free_buffer(b);
+ }
+ break;
+
+ case 'y':
+ {
+ size_t len, dest_len;
+ int slash;
+ struct buffer *b2;
+ char *src_buf, *dest_buf;
+
+ slash = inchar();
+ if ( !(b = match_slash(slash, true)) )
+ bad_prog(_(UNTERM_Y_CMD));
+ src_buf = get_buffer(b);
+ len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
+
+ if ( !(b2 = match_slash(slash, true)) )
+ bad_prog(_(UNTERM_Y_CMD));
+ dest_buf = get_buffer(b2);
+ dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
+
+ if (mb_cur_max > 1)
+ {
+ int i, j, idx, src_char_num;
+ size_t *src_lens = MALLOC(len, size_t);
+ char **trans_pairs;
+ size_t mbclen;
+ mbstate_t cur_stat;
+
+ /* Enumerate how many character the source buffer has. */
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+ for (i = 0, j = 0; i < len;)
+ {
+ mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+ src_lens[j++] = mbclen;
+ i += mbclen;
+ }
+ src_char_num = j;
+
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+ idx = 0;
+
+ /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
+ src(i) : pointer to i-th source character.
+ dest(i) : pointer to i-th destination character.
+ NULL : terminator */
+ trans_pairs = MALLOC(2 * src_char_num + 1, char*);
+ cur_cmd->x.translatemb = trans_pairs;
+ for (i = 0; i < src_char_num; i++)
+ {
+ if (idx >= dest_len)
+ bad_prog(_(Y_CMD_LEN));
+
+ /* Set the i-th source character. */
+ trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
+ strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
+ trans_pairs[2 * i][src_lens[i]] = '\0';
+ src_buf += src_lens[i]; /* Forward to next character. */
+
+ /* Fetch the i-th destination character. */
+ mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+
+ /* Set the i-th destination character. */
+ trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
+ strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
+ trans_pairs[2 * i + 1][mbclen] = '\0';
+ idx += mbclen; /* Forward to next character. */
+ }
+ trans_pairs[2 * i] = NULL;
+ if (idx != dest_len)
+ bad_prog(_(Y_CMD_LEN));
+ }
+ else
+ {
+ char *translate = OB_MALLOC(&obs, YMAP_LENGTH, char);
+ unsigned char *ustring = CAST(unsigned char *)src_buf;
+
+ if (len != dest_len)
+ bad_prog(_(Y_CMD_LEN));
+
+ for (len = 0; len < YMAP_LENGTH; len++)
+ translate[len] = len;
+
+ while (dest_len--)
+ translate[(unsigned char)*ustring++] = *dest_buf++;
+
+ cur_cmd->x.translate = translate;
+ }
+
+ if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ free_buffer(b);
+ free_buffer(b2);
+ }
+ break;
+
+ case EOF:
+ bad_prog(_(NO_COMMAND));
+ /*NOTREACHED*/
+
+ default:
+ bad_command (ch);
+ /*NOTREACHED*/
+ }
+
+ /* this is buried down here so that "continue" statements will miss it */
+ ++vector->v_length;
+ }
+ return vector;
+}
+
+
+/* deal with \X escapes */
+size_t
+normalize_text(buf, len, buftype)
+ char *buf;
+ size_t len;
+ enum text_types buftype;
+{
+ const char *bufend = buf + len;
+ char *p = buf;
+ char *q = buf;
+
+ /* This variable prevents normalizing text within bracket
+ subexpressions when conforming to POSIX. If 0, we
+ are not within a bracket expression. If -1, we are within a
+ bracket expression but are not within [.FOO.], [=FOO=],
+ or [:FOO:]. Otherwise, this is the '.', '=', or ':'
+ respectively within these three types of subexpressions. */
+ int bracket_state = 0;
+
+ int mbclen;
+ mbstate_t cur_stat;
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+
+ while (p < bufend)
+ {
+ int c;
+ mbclen = MBRLEN (p, bufend - p, &cur_stat);
+ if (mbclen != 1)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ mbclen = 1;
+
+ memmove (q, p, mbclen);
+ q += mbclen;
+ p += mbclen;
+ continue;
+ }
+
+ if (*p == '\\' && p+1 < bufend && bracket_state == 0)
+ switch ( (c = *++p) )
+ {
+#if defined __STDC__ && __STDC__-0
+ case 'a': *q++ = '\a'; p++; continue;
+#else /* Not STDC; we'll just assume ASCII */
+ case 'a': *q++ = '\007'; p++; continue;
+#endif
+ /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
+ case 'f': *q++ = '\f'; p++; continue;
+ case '\n': /*fall through */
+ case 'n': *q++ = '\n'; p++; continue;
+ case 'r': *q++ = '\r'; p++; continue;
+ case 't': *q++ = '\t'; p++; continue;
+ case 'v': *q++ = '\v'; p++; continue;
+
+ case 'd': /* decimal byte */
+ p = convert_number(q, p+1, bufend, 10, 3, 'd');
+ q++;
+ continue;
+
+ case 'x': /* hexadecimal byte */
+ p = convert_number(q, p+1, bufend, 16, 2, 'x');
+ q++;
+ continue;
+
+#ifdef REG_PERL
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ if ((extended_regexp_flags & REG_PERL)
+ && p+1 < bufend
+ && p[1] >= '0' && p[1] <= '9')
+ {
+ p = convert_number(q, p, bufend, 8, 3, *p);
+ q++;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ }
+
+ continue;
+
+ case 'o': /* octal byte */
+ if (!(extended_regexp_flags & REG_PERL))
+ {
+ p = convert_number(q, p+1, bufend, 8, 3, 'o');
+ q++;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ }
+
+ continue;
+#else
+ case 'o': /* octal byte */
+ p = convert_number(q, p+1, bufend, 8, 3, 'o');
+ q++;
+ continue;
+#endif
+
+ case 'c':
+ if (++p < bufend)
+ {
+ *q++ = toupper(*p) ^ 0x40;
+ p++;
+ continue;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ continue;
+ }
+
+ default:
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ break;
+ }
+ else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
+ switch (*p)
+ {
+ case '[':
+ if (!bracket_state)
+ bracket_state = -1;
+ break;
+
+ case ':':
+ case '.':
+ case '=':
+ if (bracket_state == -1 && p[-1] == '[')
+ bracket_state = *p;
+ break;
+
+ case ']':
+ if (bracket_state == 0)
+ ;
+ else if (bracket_state == -1)
+ bracket_state = 0;
+ else if (p[-2] != bracket_state && p[-1] == bracket_state)
+ bracket_state = -1;
+ break;
+ }
+
+ *q++ = *p++;
+ }
+ return (size_t)(q - buf);
+}
+
+
+/* `str' is a string (from the command line) that contains a sed command.
+ Compile the command, and add it to the end of `cur_program'. */
+struct vector *
+compile_string(cur_program, str, len)
+ struct vector *cur_program;
+ char *str;
+ size_t len;
+{
+ static countT string_expr_count = 0;
+ struct vector *ret;
+
+ prog.file = NULL;
+ prog.base = CAST(unsigned char *)str;
+ prog.cur = prog.base;
+ prog.end = prog.cur + len;
+
+ cur_input.line = 0;
+ cur_input.name = NULL;
+ cur_input.string_expr_count = ++string_expr_count;
+
+ ret = compile_program(cur_program);
+ prog.base = NULL;
+ prog.cur = NULL;
+ prog.end = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+/* `cmdfile' is the name of a file containing sed commands.
+ Read them in and add them to the end of `cur_program'.
+ */
+struct vector *
+compile_file(cur_program, cmdfile)
+ struct vector *cur_program;
+ const char *cmdfile;
+{
+ size_t len;
+ struct vector *ret;
+
+ prog.file = stdin;
+ if (cmdfile[0] != '-' || cmdfile[1] != '\0')
+ prog.file = ck_fopen(cmdfile, "rt", true);
+
+ cur_input.line = 1;
+ cur_input.name = cmdfile;
+ cur_input.string_expr_count = 0;
+
+ ret = compile_program(cur_program);
+ if (prog.file != stdin)
+ ck_fclose(prog.file);
+ prog.file = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+/* Make any checks which require the whole program to have been read.
+ In particular: this backpatches the jump targets.
+ Any cleanup which can be done after these checks is done here also. */
+void
+check_final_program(program)
+ struct vector *program;
+{
+ struct sed_label *go;
+ struct sed_label *lbl;
+
+ /* do all "{"s have a corresponding "}"? */
+ if (blocks)
+ {
+ /* update info for error reporting: */
+ MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
+ bad_prog(_(EXCESS_OPEN_BRACE));
+ }
+
+ /* was the final command an unterminated a/c/i command? */
+ if (pending_text)
+ {
+ old_text_buf->text_length = size_buffer(pending_text);
+ old_text_buf->text = MEMDUP(get_buffer(pending_text),
+ old_text_buf->text_length, char);
+ free_buffer(pending_text);
+ pending_text = NULL;
+ }
+
+ for (go = jumps; go; go = release_label(go))
+ {
+ for (lbl = labels; lbl; lbl = lbl->next)
+ if (strcmp(lbl->name, go->name) == 0)
+ break;
+ if (lbl)
+ {
+ program->v[go->v_index].x.jump_index = lbl->v_index;
+ }
+ else
+ {
+ if (*go->name)
+ panic(_("can't find label for jump to `%s'"), go->name);
+ program->v[go->v_index].x.jump_index = program->v_length;
+ }
+ }
+ jumps = NULL;
+
+ for (lbl = labels; lbl; lbl = release_label(lbl))
+ ;
+ labels = NULL;
+
+ /* There is no longer a need to track file names: */
+ {
+ struct output *p;
+
+ for (p=file_read; p; p=p->link)
+ if (p->name)
+ {
+ FREE(p->name);
+ p->name = NULL;
+ }
+
+ for (p=file_write; p; p=p->link)
+ if (p->name)
+ {
+ FREE(p->name);
+ p->name = NULL;
+ }
+ }
+}
+
+/* Rewind all resources which were allocated in this module. */
+void
+rewind_read_files()
+{
+ struct output *p;
+
+ for (p=file_read; p; p=p->link)
+ if (p->fp)
+ rewind(p->fp);
+}
+
+/* Release all resources which were allocated in this module. */
+void
+finish_program(program)
+ struct vector *program;
+{
+ /* close all files... */
+ {
+ struct output *p, *q;
+
+ for (p=file_read; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose(p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ FREE(p);
+#endif
+ }
+
+ for (p=file_write; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose(p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ FREE(p);
+#endif
+ }
+ file_read = file_write = NULL;
+ }
+
+#ifdef DEBUG_LEAKS
+ obstack_free (&obs, NULL);
+#endif /*DEBUG_LEAKS*/
+}
diff --git a/sed/execute.c b/sed/execute.c
new file mode 100644
index 0000000..005a063
--- /dev/null
+++ b/sed/execute.c
@@ -0,0 +1,1747 @@
+
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/
+#define INITIAL_BUFFER_SIZE 50
+#define FREAD_BUFFER_SIZE 8192
+
+#include "sed.h"
+
+#include <stdio.h>
+#include <ctype.h>
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifdef __GNUC__
+# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
+ /* silence warning about unused parameter even for "gcc -W -Wunused" */
+# define UNUSED __attribute__((unused))
+# endif
+#endif
+#ifndef UNUSED
+# define UNUSED
+#endif
+
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#else
+# include <string.h>
+#endif /*HAVE_STRINGS_H*/
+#ifdef HAVE_MEMORY_H
+# include <memory.h>
+#endif
+
+#ifndef HAVE_STRCHR
+# define strchr index
+# define strrchr rindex
+#endif
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#include <sys/stat.h>
+
+
+/* Sed operates a line at a time. */
+struct line {
+ char *text; /* Pointer to line allocated by malloc. */
+ char *active; /* Pointer to non-consumed part of text. */
+ size_t length; /* Length of text (or active, if used). */
+ size_t alloc; /* Allocated space for active. */
+ bool chomped; /* Was a trailing newline dropped? */
+#ifdef HAVE_MBRTOWC
+ mbstate_t mbstate;
+#endif
+};
+
+/* A queue of text to write out at the end of a cycle
+ (filled by the "a", "r" and "R" commands.) */
+struct append_queue {
+ const char *fname;
+ char *text;
+ size_t textlen;
+ struct append_queue *next;
+ bool free;
+};
+
+/* State information for the input stream. */
+struct input {
+ /* The list of yet-to-be-opened files. It is invalid for file_list
+ to be NULL. When *file_list is NULL we are currently processing
+ the last file. */
+
+ char **file_list;
+
+ /* Count of files we failed to open. */
+ countT bad_count;
+
+ /* Current input line number (over all files). */
+ countT line_number;
+
+ /* True if we'll reset line numbers and addresses before
+ starting to process the next (possibly the first) file. */
+ bool reset_at_next_file;
+
+ /* Function to read one line. If FP is NULL, read_fn better not
+ be one which uses fp; in particular, read_always_fail() is
+ recommended. */
+ bool (*read_fn) P_((struct input *)); /* read one line */
+
+ char *out_file_name;
+
+ const char *in_file_name;
+
+ /* if NULL, none of the following are valid */
+ FILE *fp;
+
+ bool no_buffering;
+};
+
+
+/* Have we done any replacements lately? This is used by the `t' command. */
+static bool replaced = false;
+
+/* The current output file (stdout if -i is not being used. */
+static struct output output_file;
+
+/* The `current' input line. */
+static struct line line;
+
+/* An input line used to accumulate the result of the s and e commands. */
+static struct line s_accum;
+
+/* An input line that's been stored by later use by the program */
+static struct line hold;
+
+/* The buffered input look-ahead. The only field that should be
+ used outside of read_mem_line() or line_init() is buffer.length. */
+static struct line buffer;
+
+static struct append_queue *append_head = NULL;
+static struct append_queue *append_tail = NULL;
+
+
+#ifdef BOOTSTRAP
+/* We can't be sure that the system we're boostrapping on has
+ memchr(), and ../lib/memchr.c requires configuration knowledge
+ about how many bits are in a `long'. This implementation
+ is far from ideal, but it should get us up-and-limping well
+ enough to run the configure script, which is all that matters.
+*/
+# ifdef memchr
+# undef memchr
+# endif
+# define memchr bootstrap_memchr
+
+static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
+static VOID *
+bootstrap_memchr(s, c, n)
+ const VOID *s;
+ int c;
+ size_t n;
+{
+ char *p;
+
+ for (p=(char *)s; n-- > 0; ++p)
+ if (*p == c)
+ return p;
+ return CAST(VOID *)0;
+}
+#endif /*BOOTSTRAP*/
+
+/* increase a struct line's length, making some attempt at
+ keeping realloc() calls under control by padding for future growth. */
+static void resize_line P_((struct line *, size_t));
+static void
+resize_line(lb, len)
+ struct line *lb;
+ size_t len;
+{
+ int inactive;
+ inactive = lb->active - lb->text;
+
+ /* If the inactive part has got to more than two thirds of the buffer,
+ * remove it. */
+ if (inactive > lb->alloc * 2)
+ {
+ MEMMOVE(lb->text, lb->active, lb->length);
+ lb->alloc += lb->active - lb->text;
+ lb->active = lb->text;
+ inactive = 0;
+
+ if (lb->alloc > len)
+ return;
+ }
+
+ lb->alloc *= 2;
+ if (lb->alloc < len)
+ lb->alloc = len;
+ if (lb->alloc < INITIAL_BUFFER_SIZE)
+ lb->alloc = INITIAL_BUFFER_SIZE;
+
+ lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
+ lb->active = lb->text + inactive;
+}
+
+/* Append `length' bytes from `string' to the line `to'. */
+static void str_append P_((struct line *, const char *, size_t));
+static void
+str_append(to, string, length)
+ struct line *to;
+ const char *string;
+ size_t length;
+{
+ size_t new_length = to->length + length;
+
+ if (to->alloc < new_length)
+ resize_line(to, new_length);
+ MEMCPY(to->active + to->length, string, length);
+ to->length = new_length;
+
+#ifdef HAVE_MBRTOWC
+ if (mb_cur_max == 1)
+ return;
+
+ while (length)
+ {
+ int n = MBRLEN (string, length, &to->mbstate);
+
+ /* An invalid sequence is treated like a singlebyte character. */
+ if (n == -1)
+ {
+ memset (&to->mbstate, 0, sizeof (to->mbstate));
+ n = 1;
+ }
+
+ if (n > 0)
+ length -= n;
+ else
+ break;
+ }
+#endif
+}
+
+static void str_append_modified P_((struct line *, const char *, size_t,
+ enum replacement_types));
+static void
+str_append_modified(to, string, length, type)
+ struct line *to;
+ const char *string;
+ size_t length;
+ enum replacement_types type;
+{
+ size_t old_length = to->length;
+ char *start, *end;
+
+ if (length == 0)
+ return;
+
+#ifdef HAVE_MBRTOWC
+ {
+ mbstate_t from_stat;
+
+ if (type == REPL_ASIS)
+ {
+ str_append(to, string, length);
+ return;
+ }
+
+ if (to->alloc - to->length < length * mb_cur_max)
+ resize_line(to, to->length + length * mb_cur_max);
+
+ MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
+ while (length)
+ {
+ wchar_t wc;
+ int n = MBRTOWC (&wc, string, length, &from_stat);
+
+ /* An invalid sequence is treated like a singlebyte character. */
+ if (n == -1)
+ {
+ memset (&to->mbstate, 0, sizeof (from_stat));
+ n = 1;
+ }
+
+ if (n > 0)
+ string += n, length -= n;
+ else
+ {
+ /* Incomplete sequence, copy it manually. */
+ str_append(to, string, length);
+ return;
+ }
+
+ /* Convert the first character specially... */
+ if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
+ {
+ if (type & REPL_UPPERCASE_FIRST)
+ wc = towupper(wc);
+ else
+ wc = towlower(wc);
+
+ type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
+ if (type == REPL_ASIS)
+ {
+ str_append(to, string, length);
+ return;
+ }
+ }
+
+ else if (type & REPL_UPPERCASE)
+ wc = towupper(wc);
+ else
+ wc = towlower(wc);
+
+ /* Copy the new wide character to the end of the string. */
+ n = wcrtomb (to->active + to->length, wc, &to->mbstate);
+ to->length += n;
+ if (n == -1)
+ {
+ fprintf (stderr, "Case conversion produced an invalid character!");
+ abort ();
+ }
+ }
+ }
+#else
+ str_append(to, string, length);
+ start = to->active + old_length;
+ end = start + length;
+
+ /* Now do the required modifications. First \[lu]... */
+ if (type & REPL_UPPERCASE_FIRST)
+ {
+ *start = toupper(*start);
+ start++;
+ type &= ~REPL_UPPERCASE_FIRST;
+ }
+ else if (type & REPL_LOWERCASE_FIRST)
+ {
+ *start = tolower(*start);
+ start++;
+ type &= ~REPL_LOWERCASE_FIRST;
+ }
+
+ if (type == REPL_ASIS)
+ return;
+
+ /* ...and then \[LU] */
+ if (type == REPL_UPPERCASE)
+ for (; start != end; start++)
+ *start = toupper(*start);
+ else
+ for (; start != end; start++)
+ *start = tolower(*start);
+#endif
+}
+
+/* initialize a "struct line" buffer */
+static void line_init P_((struct line *, size_t initial_size));
+static void
+line_init(buf, initial_size)
+ struct line *buf;
+ size_t initial_size;
+{
+ buf->text = MALLOC(initial_size, char);
+ buf->active = buf->text;
+ buf->alloc = initial_size;
+ buf->length = 0;
+ buf->chomped = true;
+
+#ifdef HAVE_MBRTOWC
+ memset (&buf->mbstate, 0, sizeof (buf->mbstate));
+#endif
+
+}
+
+/* Copy the contents of the line `from' into the line `to'.
+ This destroys the old contents of `to'. */
+static void line_copy P_((struct line *from, struct line *to));
+static void
+line_copy(from, to)
+ struct line *from;
+ struct line *to;
+{
+ /* Remove the inactive portion in the destination buffer. */
+ to->alloc += to->active - to->text;
+
+ if (to->alloc < from->length)
+ {
+ to->alloc *= 2;
+ if (to->alloc < from->length)
+ to->alloc = from->length;
+ if (to->alloc < INITIAL_BUFFER_SIZE)
+ to->alloc = INITIAL_BUFFER_SIZE;
+ /* Use FREE()+MALLOC() instead of REALLOC() to
+ avoid unnecessary copying of old text. */
+ FREE(to->text);
+ to->text = MALLOC(to->alloc, char);
+ }
+
+ to->active = to->text;
+ to->length = from->length;
+ to->chomped = from->chomped;
+ MEMCPY(to->active, from->active, from->length);
+
+#ifdef HAVE_MBRTOWC
+ MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
+#endif
+}
+
+/* Append the contents of the line `from' to the line `to'. */
+static void line_append P_((struct line *from, struct line *to));
+static void
+line_append(from, to)
+ struct line *from;
+ struct line *to;
+{
+ str_append(to, "\n", 1);
+ str_append(to, from->active, from->length);
+ to->chomped = from->chomped;
+
+#ifdef HAVE_MBRTOWC
+ MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
+#endif
+}
+
+/* Exchange the contents of two "struct line" buffers. */
+static void line_exchange P_((struct line *, struct line *));
+static void
+line_exchange(a, b)
+ struct line *a;
+ struct line *b;
+{
+ struct line t;
+
+ MEMCPY(&t, a, sizeof(struct line));
+ MEMCPY( a, b, sizeof(struct line));
+ MEMCPY( b, &t, sizeof(struct line));
+}
+
+
+/* dummy function to simplify read_pattern_space() */
+static bool read_always_fail P_((struct input *));
+static bool
+read_always_fail(input)
+ struct input *input UNUSED;
+{
+ return false;
+}
+
+static bool read_file_line P_((struct input *));
+static bool
+read_file_line(input)
+ struct input *input;
+{
+ static char *b;
+ static size_t blen;
+
+ long result = ck_getline (&b, &blen, input->fp);
+ if (result <= 0)
+ return false;
+
+ /* Remove the trailing new-line that is left by getline. */
+ if (b[result - 1] == '\n')
+ --result;
+ else
+ line.chomped = false;
+
+ str_append(&line, b, result);
+ return true;
+}
+
+
+static inline void output_missing_newline P_((struct output *));
+static inline void
+output_missing_newline(outf)
+ struct output *outf;
+{
+ if (outf->missing_newline)
+ {
+ ck_fwrite("\n", 1, 1, outf->fp);
+ outf->missing_newline = false;
+ }
+}
+
+static inline void flush_output P_((FILE *));
+static inline void
+flush_output(fp)
+ FILE *fp;
+{
+ if (fp != stdout || unbuffered_output)
+ ck_fflush(fp);
+}
+
+static void output_line P_((const char *, size_t, bool, struct output *));
+static void
+output_line(text, length, nl, outf)
+ const char *text;
+ size_t length;
+ bool nl;
+ struct output *outf;
+{
+ output_missing_newline(outf);
+
+ if (length)
+ ck_fwrite(text, 1, length, outf->fp);
+
+ if (nl)
+ ck_fwrite("\n", 1, 1, outf->fp);
+ else
+ outf->missing_newline = true;
+
+ flush_output(outf->fp);
+}
+
+static struct append_queue *next_append_slot P_((void));
+static struct append_queue *
+next_append_slot()
+{
+ struct append_queue *n = MALLOC(1, struct append_queue);
+
+ n->fname = NULL;
+ n->text = NULL;
+ n->textlen = 0;
+ n->next = NULL;
+ n->free = false;
+
+ if (append_tail)
+ append_tail->next = n;
+ else
+ append_head = n;
+ return append_tail = n;
+}
+
+static void release_append_queue P_((void));
+static void
+release_append_queue()
+{
+ struct append_queue *p, *q;
+
+ for (p=append_head; p; p=q)
+ {
+ if (p->free)
+ FREE(p->text);
+
+ q = p->next;
+ FREE(p);
+ }
+ append_head = append_tail = NULL;
+}
+
+static void dump_append_queue P_((void));
+static void
+dump_append_queue()
+{
+ struct append_queue *p;
+
+ output_missing_newline(&output_file);
+ for (p=append_head; p; p=p->next)
+ {
+ if (p->text)
+ ck_fwrite(p->text, 1, p->textlen, output_file.fp);
+
+ if (p->fname)
+ {
+ char buf[FREAD_BUFFER_SIZE];
+ size_t cnt;
+ FILE *fp;
+
+ /* "If _fname_ does not exist or cannot be read, it shall
+ be treated as if it were an empty file, causing no error
+ condition." IEEE Std 1003.2-1992
+ So, don't fail. */
+ fp = ck_fopen(p->fname, "r", false);
+ if (fp)
+ {
+ while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
+ ck_fwrite(buf, 1, cnt, output_file.fp);
+ ck_fclose(fp);
+ }
+ }
+ }
+
+ flush_output(output_file.fp);
+ release_append_queue();
+}
+
+
+/* Compute the name of the backup file for in-place editing */
+static char *get_backup_file_name P_((const char *));
+static char *
+get_backup_file_name(name)
+ const char *name;
+{
+ char *old_asterisk, *asterisk, *backup, *p;
+ int name_length = strlen(name), backup_length = strlen(in_place_extension);
+
+ /* Compute the length of the backup file */
+ for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
+ asterisk = strchr(old_asterisk, '*');
+ old_asterisk = asterisk + 1)
+ backup_length += name_length - 1;
+
+ p = backup = xmalloc(backup_length + 1);
+
+ /* Each iteration gobbles up to an asterisk */
+ for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
+ asterisk = strchr(old_asterisk, '*');
+ old_asterisk = asterisk + 1)
+ {
+ MEMCPY (p, old_asterisk, asterisk - old_asterisk);
+ p += asterisk - old_asterisk;
+ strcpy (p, name);
+ p += name_length;
+ }
+
+ /* Tack on what's after the last asterisk */
+ strcpy (p, old_asterisk);
+ return backup;
+}
+
+/* Initialize a struct input for the named file. */
+static void open_next_file P_((const char *name, struct input *));
+static void
+open_next_file(name, input)
+ const char *name;
+ struct input *input;
+{
+ buffer.length = 0;
+
+ if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
+ {
+ clearerr(stdin); /* clear any stale EOF indication */
+ input->fp = stdin;
+ }
+ else if ( ! (input->fp = ck_fopen(name, "r", false)) )
+ {
+ const char *ptr = strerror(errno);
+ fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
+ input->read_fn = read_always_fail; /* a redundancy */
+ ++input->bad_count;
+ return;
+ }
+
+ input->read_fn = read_file_line;
+
+ if (in_place_extension)
+ {
+ int output_fd;
+ char *tmpdir = ck_strdup(name), *p;
+ struct stat st;
+
+ /* get the base name */
+ if (p = strrchr(tmpdir, '/'))
+ *(p + 1) = 0;
+ else
+ strcpy(tmpdir, ".");
+
+ input->in_file_name = name;
+
+ if (isatty (fileno (input->fp)))
+ panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
+
+ fstat (fileno (input->fp), &st);
+ if (!S_ISREG (st.st_mode))
+ panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
+
+ output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
+ output_file.missing_newline = false;
+ free (tmpdir);
+
+ if (!output_file.fp)
+ panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
+
+ output_fd = fileno (output_file.fp);
+#ifdef HAVE_FCHMOD
+ fchmod (output_fd, st.st_mode);
+#endif
+#ifdef HAVE_FCHOWN
+ if (fchown (output_fd, st.st_uid, st.st_gid) == -1)
+ fchown (output_fd, -1, st.st_gid);
+#endif
+ }
+ else
+ output_file.fp = stdout;
+}
+
+
+/* Clean up an input stream that we are done with. */
+static void closedown P_((struct input *));
+static void
+closedown(input)
+ struct input *input;
+{
+ input->read_fn = read_always_fail;
+ if (!input->fp)
+ return;
+ if (input->fp != stdin) /* stdin can be reused on tty and tape devices */
+ ck_fclose(input->fp);
+
+ if (in_place_extension && output_file.fp != NULL)
+ {
+ ck_fclose (output_file.fp);
+ if (strcmp(in_place_extension, "*") != 0)
+ {
+ char *backup_file_name = get_backup_file_name(input->in_file_name);
+ ck_rename (input->in_file_name, backup_file_name, input->out_file_name);
+ free (backup_file_name);
+ }
+
+ ck_rename (input->out_file_name, input->in_file_name, input->out_file_name);
+ free (input->out_file_name);
+ }
+
+ input->fp = NULL;
+}
+
+/* Reset range commands so that they are marked as non-matching */
+static void reset_addresses P_((struct vector *));
+static void
+reset_addresses(vec)
+ struct vector *vec;
+{
+ struct sed_cmd *cur_cmd;
+ int n;
+
+ for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
+ if (cur_cmd->a1
+ && (cur_cmd->a1->addr_type == ADDR_IS_NUM
+ || cur_cmd->a1->addr_type == ADDR_IS_NUM_MOD)
+ && cur_cmd->a1->addr_number == 0)
+ cur_cmd->range_state = RANGE_ACTIVE;
+ else
+ cur_cmd->range_state = RANGE_INACTIVE;
+}
+
+/* Read in the next line of input, and store it in the pattern space.
+ Return zero if there is nothing left to input. */
+static bool read_pattern_space P_((struct input *, struct vector *, bool));
+static bool
+read_pattern_space(input, the_program, append)
+ struct input *input;
+ struct vector *the_program;
+ bool append;
+{
+ if (append_head) /* redundant test to optimize for common case */
+ dump_append_queue();
+ replaced = false;
+ if (!append)
+ line.length = 0;
+ line.chomped = true; /* default, until proved otherwise */
+
+ while ( ! (*input->read_fn)(input) )
+ {
+ closedown(input);
+
+ if (!*input->file_list)
+ return false;
+
+ if (input->reset_at_next_file)
+ {
+ input->line_number = 0;
+ reset_addresses (the_program);
+ rewind_read_files ();
+
+ /* If doing in-place editing, we will never append the
+ new-line to this file; but if the output goes to stdout,
+ we might still have to output the missing new-line. */
+ if (in_place_extension)
+ output_file.missing_newline = false;
+
+ input->reset_at_next_file = separate_files;
+ }
+
+ open_next_file (*input->file_list++, input);
+ }
+
+ ++input->line_number;
+ return true;
+}
+
+
+static bool last_file_with_data_p P_((struct input *));
+static bool
+last_file_with_data_p(input)
+ struct input *input;
+{
+ for (;;)
+ {
+ int ch;
+
+ closedown(input);
+ if (!*input->file_list)
+ return true;
+ open_next_file(*input->file_list++, input);
+ if (input->fp)
+ {
+ if ((ch = getc(input->fp)) != EOF)
+ {
+ ungetc(ch, input->fp);
+ return false;
+ }
+ }
+ }
+}
+
+/* Determine if we match the `$' address. */
+static bool test_eof P_((struct input *));
+static bool
+test_eof(input)
+ struct input *input;
+{
+ int ch;
+
+ if (buffer.length)
+ return false;
+ if (!input->fp)
+ return separate_files || last_file_with_data_p(input);
+ if (feof(input->fp))
+ return separate_files || last_file_with_data_p(input);
+ if ((ch = getc(input->fp)) == EOF)
+ return separate_files || last_file_with_data_p(input);
+ ungetc(ch, input->fp);
+ return false;
+}
+
+/* Return non-zero if the current line matches the address
+ pointed to by `addr'. */
+static bool match_an_address_p P_((struct addr *, struct input *));
+static bool
+match_an_address_p(addr, input)
+ struct addr *addr;
+ struct input *input;
+{
+ switch (addr->addr_type)
+ {
+ case ADDR_IS_NULL:
+ return true;
+
+ case ADDR_IS_REGEX:
+ return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
+
+ case ADDR_IS_NUM_MOD:
+ return (input->line_number >= addr->addr_number
+ && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
+
+ case ADDR_IS_STEP:
+ case ADDR_IS_STEP_MOD:
+ /* reminder: these are only meaningful for a2 addresses */
+ /* a2->addr_number needs to be recomputed each time a1 address
+ matches for the step and step_mod types */
+ return (addr->addr_number <= input->line_number);
+
+ case ADDR_IS_LAST:
+ return test_eof(input);
+
+ /* ADDR_IS_NUM is handled in match_address_p. */
+ case ADDR_IS_NUM:
+ default:
+ panic("INTERNAL ERROR: bad address type");
+ }
+ /*NOTREACHED*/
+ return false;
+}
+
+/* return non-zero if current address is valid for cmd */
+static bool match_address_p P_((struct sed_cmd *, struct input *));
+static bool
+match_address_p(cmd, input)
+ struct sed_cmd *cmd;
+ struct input *input;
+{
+ if (!cmd->a1)
+ return true;
+
+ if (cmd->range_state != RANGE_ACTIVE)
+ {
+ /* Find if we are going to activate a range. Handle ADDR_IS_NUM
+ specially: it represent an "absolute" state, it should not
+ be computed like regexes. */
+ if (cmd->a1->addr_type == ADDR_IS_NUM)
+ {
+ if (!cmd->a2)
+ return (input->line_number == cmd->a1->addr_number);
+
+ if (cmd->range_state == RANGE_CLOSED
+ || input->line_number < cmd->a1->addr_number)
+ return false;
+ }
+ else
+ {
+ if (!cmd->a2)
+ return match_an_address_p(cmd->a1, input);
+
+ if (!match_an_address_p(cmd->a1, input))
+ return false;
+ }
+
+ /* Ok, start a new range. */
+ cmd->range_state = RANGE_ACTIVE;
+ switch (cmd->a2->addr_type)
+ {
+ case ADDR_IS_REGEX:
+ /* Always include at least two lines. */
+ return true;
+ case ADDR_IS_NUM:
+ /* Same handling as below, but always include at least one line. */
+ if (input->line_number >= cmd->a2->addr_number)
+ cmd->range_state = RANGE_CLOSED;
+ return true;
+ case ADDR_IS_STEP:
+ cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
+ return true;
+ case ADDR_IS_STEP_MOD:
+ cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
+ - (input->line_number%cmd->a2->addr_step);
+ return true;
+ default:
+ break;
+ }
+ }
+
+ /* cmd->range_state == RANGE_ACTIVE. Check if the range is
+ ending; also handle ADDR_IS_NUM specially in this case. */
+
+ if (cmd->a2->addr_type == ADDR_IS_NUM)
+ {
+ /* If the second address is a line number, and if we got past
+ that line, fail to match (it can happen when you jump
+ over such addresses with `b' and `t'. Use RANGE_CLOSED
+ so that the range is not re-enabled anymore. */
+ if (input->line_number >= cmd->a2->addr_number)
+ cmd->range_state = RANGE_CLOSED;
+
+ return (input->line_number <= cmd->a2->addr_number);
+ }
+
+ /* Other addresses are treated as usual. */
+ if (match_an_address_p(cmd->a2, input))
+ cmd->range_state = RANGE_CLOSED;
+
+ return true;
+}
+
+
+static void do_list P_((int line_len));
+static void
+do_list(line_len)
+ int line_len;
+{
+ unsigned char *p = CAST(unsigned char *)line.active;
+ countT len = line.length;
+ countT width = 0;
+ char obuf[180]; /* just in case we encounter a 512-bit char (;-) */
+ char *o;
+ size_t olen;
+ FILE *fp = output_file.fp;
+
+ output_missing_newline(&output_file);
+ for (; len--; ++p) {
+ o = obuf;
+
+ /* Some locales define 8-bit characters as printable. This makes the
+ testsuite fail at 8to7.sed because the `l' command in fact will not
+ convert the 8-bit characters. */
+#if defined isascii || defined HAVE_ISASCII
+ if (isascii(*p) && ISPRINT(*p)) {
+#else
+ if (ISPRINT(*p)) {
+#endif
+ *o++ = *p;
+ if (*p == '\\')
+ *o++ = '\\';
+ } else {
+ *o++ = '\\';
+ switch (*p) {
+#if defined __STDC__ && __STDC__-0
+ case '\a': *o++ = 'a'; break;
+#else /* Not STDC; we'll just assume ASCII */
+ case 007: *o++ = 'a'; break;
+#endif
+ case '\b': *o++ = 'b'; break;
+ case '\f': *o++ = 'f'; break;
+ case '\n': *o++ = 'n'; break;
+ case '\r': *o++ = 'r'; break;
+ case '\t': *o++ = 't'; break;
+ case '\v': *o++ = 'v'; break;
+ default:
+ sprintf(o, "%03o", *p);
+ o += strlen(o);
+ break;
+ }
+ }
+ olen = o - obuf;
+ if (width+olen >= line_len && line_len > 0) {
+ ck_fwrite("\\\n", 1, 2, fp);
+ width = 0;
+ }
+ ck_fwrite(obuf, 1, olen, fp);
+ width += olen;
+ }
+ ck_fwrite("$\n", 1, 2, fp);
+ flush_output (fp);
+}
+
+
+static enum replacement_types append_replacement P_((struct line *, struct replacement *,
+ struct re_registers *,
+ enum replacement_types));
+static enum replacement_types
+append_replacement (buf, p, regs, repl_mod)
+ struct line *buf;
+ struct replacement *p;
+ struct re_registers *regs;
+ enum replacement_types repl_mod;
+{
+ for (; p; p=p->next)
+ {
+ int i = p->subst_id;
+ enum replacement_types curr_type;
+
+ /* Apply a \[lu] modifier that was given earlier, but which we
+ have not had yet the occasion to apply. But don't do it
+ if this replacement has a modifier of its own. */
+ curr_type = (p->repl_type & REPL_MODIFIERS)
+ ? p->repl_type
+ : p->repl_type | repl_mod;
+
+ repl_mod = 0;
+ if (p->prefix_length)
+ {
+ str_append_modified(buf, p->prefix, p->prefix_length,
+ curr_type);
+ curr_type &= ~REPL_MODIFIERS;
+ }
+
+ if (0 <= i)
+ if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
+ /* Save this modifier, we shall apply it later.
+ e.g. in s/()([a-z])/\u\1\2/
+ the \u modifier is applied to \2, not \1 */
+ repl_mod = curr_type & REPL_MODIFIERS;
+
+ else
+ str_append_modified(buf, line.active + regs->start[i],
+ CAST(size_t)(regs->end[i] - regs->start[i]),
+ curr_type);
+ }
+
+ return repl_mod;
+}
+
+static void do_subst P_((struct subst *));
+static void
+do_subst(sub)
+ struct subst *sub;
+{
+ size_t start = 0; /* where to start scan for (next) match in LINE */
+ size_t last_end = 0; /* where did the last successful match end in LINE */
+ countT count = 0; /* number of matches found */
+ bool again = true;
+
+#define MAX_BACKREFERENCES 10
+ static struct re_registers regs;
+
+ if (s_accum.alloc == 0)
+ line_init(&s_accum, INITIAL_BUFFER_SIZE);
+ s_accum.length = 0;
+
+ /* The first part of the loop optimizes s/xxx// when xxx is at the
+ start, and s/xxx$// */
+ if (!match_regex(sub->regx, line.active, line.length, start,
+ &regs, MAX_BACKREFERENCES))
+ return;
+
+ if (!sub->replacement && sub->numb <= 1)
+ if (regs.start[0] == 0 && !sub->global)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ line.active += regs.end[0];
+ line.length -= regs.end[0];
+ line.alloc -= regs.end[0];
+ goto post_subst;
+ }
+ else if (regs.end[0] == line.length)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ line.length = regs.start[0];
+ goto post_subst;
+ }
+
+ do
+ {
+ enum replacement_types repl_mod = 0;
+
+ size_t offset = regs.start[0];
+ size_t matched = regs.end[0] - regs.start[0];
+
+ /* Copy stuff to the left of this match into the output string. */
+ if (start < offset)
+ str_append(&s_accum, line.active + start, offset - start);
+
+ /* If we're counting up to the Nth match, are we there yet?
+ And even if we are there, there is another case we have to
+ skip: are we matching an empty string immediately following
+ another match?
+
+ This latter case avoids that baaaac, when passed through
+ s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is
+ unacceptable because it is not consistently applied (for
+ example, `baaaa' gives `xbx', not `xbxx'). */
+ if ((matched > 0 || count == 0 || offset > last_end)
+ && ++count >= sub->numb)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ /* Now expand the replacement string into the output string. */
+ repl_mod = append_replacement (&s_accum, sub->replacement, &regs, repl_mod);
+ again = sub->global;
+ }
+ else
+ {
+ /* The match was not replaced. Copy the text until its
+ end; if it was vacuous, skip over one character and
+ add that character to the output. */
+ if (matched == 0)
+ {
+ if (start < line.length)
+ matched = 1;
+ else
+ break;
+ }
+
+ str_append(&s_accum, line.active + offset, matched);
+ }
+
+ /* Start after the match. last_end is the real end of the matched
+ substring, excluding characters that were skipped in case the RE
+ matched the empty string. */
+ start = offset + matched;
+ last_end = regs.end[0];
+ }
+ while (again
+ && start <= line.length
+ && match_regex(sub->regx, line.active, line.length, start,
+ &regs, MAX_BACKREFERENCES));
+
+ /* Copy stuff to the right of the last match into the output string. */
+ if (start < line.length)
+ str_append(&s_accum, line.active + start, line.length-start);
+ s_accum.chomped = line.chomped;
+
+ /* Exchange line and s_accum. This can be much cheaper
+ than copying s_accum.active into line.text (for huge lines). */
+ line_exchange(&line, &s_accum);
+
+ /* Finish up. */
+ if (count < sub->numb)
+ return;
+
+ post_subst:
+ if (sub->print & 1)
+ output_line(line.active, line.length, line.chomped, &output_file);
+
+ if (sub->eval)
+ {
+#ifdef HAVE_POPEN
+ FILE *pipe;
+ s_accum.length = 0;
+
+ str_append (&line, "", 1);
+ pipe = popen(line.active, "r");
+
+ if (pipe != NULL)
+ {
+ while (!feof (pipe))
+ {
+ char buf[4096];
+ int n = fread (buf, sizeof(char), 4096, pipe);
+ if (n > 0)
+ str_append(&s_accum, buf, n);
+ }
+
+ pclose (pipe);
+
+ line_exchange(&line, &s_accum);
+ if (line.length &&
+ line.active[line.length - 1] == '\n')
+ line.length--;
+ }
+ else
+ panic(_("error in subprocess"));
+#else
+ panic(_("option `e' not supported"));
+#endif
+ }
+
+ if (sub->print & 2)
+ output_line(line.active, line.length, line.chomped, &output_file);
+ if (sub->outf)
+ output_line(line.active, line.length, line.chomped, sub->outf);
+}
+
+#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
+/* Used to attempt a simple-minded optimization. */
+
+static countT branches;
+
+static countT count_branches P_((struct vector *));
+static countT
+count_branches(program)
+ struct vector *program;
+{
+ struct sed_cmd *cur_cmd = program->v;
+ countT isn_cnt = program->v_length;
+ countT cnt = 0;
+
+ while (isn_cnt-- > 0)
+ {
+ switch (cur_cmd->cmd)
+ {
+ case 'b':
+ case 't':
+ case 'T':
+ case '{':
+ ++cnt;
+ }
+ }
+ return cnt;
+}
+
+static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
+static struct sed_cmd *
+shrink_program(vec, cur_cmd)
+ struct vector *vec;
+ struct sed_cmd *cur_cmd;
+{
+ struct sed_cmd *v = vec->v;
+ struct sed_cmd *last_cmd = v + vec->v_length;
+ struct sed_cmd *p;
+ countT cmd_cnt;
+
+ for (p=v; p < cur_cmd; ++p)
+ if (p->cmd != '#')
+ MEMCPY(v++, p, sizeof *v);
+ cmd_cnt = v - vec->v;
+
+ for (; p < last_cmd; ++p)
+ if (p->cmd != '#')
+ MEMCPY(v++, p, sizeof *v);
+ vec->v_length = v - vec->v;
+
+ return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
+}
+#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
+
+/* Execute the program `vec' on the current input line.
+ Return exit status if caller should quit, -1 otherwise. */
+static int execute_program P_((struct vector *, struct input *));
+static int
+execute_program(vec, input)
+ struct vector *vec;
+ struct input *input;
+{
+ struct sed_cmd *cur_cmd;
+ struct sed_cmd *end_cmd;
+
+ cur_cmd = vec->v;
+ end_cmd = vec->v + vec->v_length;
+ while (cur_cmd < end_cmd)
+ {
+ if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
+ {
+ switch (cur_cmd->cmd)
+ {
+ case 'a':
+ {
+ struct append_queue *aq = next_append_slot();
+ aq->text = cur_cmd->x.cmd_txt.text;
+ aq->textlen = cur_cmd->x.cmd_txt.text_length;
+ }
+ break;
+
+ case '{':
+ case 'b':
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+
+ case '}':
+ case '#':
+ case ':':
+ /* Executing labels and block-ends are easy. */
+ break;
+
+ case 'c':
+ if (cur_cmd->range_state != RANGE_ACTIVE)
+ output_line(cur_cmd->x.cmd_txt.text,
+ cur_cmd->x.cmd_txt.text_length - 1, true,
+ &output_file);
+ /* POSIX.2 is silent about c starting a new cycle,
+ but it seems to be expected (and make sense). */
+ /* Fall Through */
+ case 'd':
+ return -1;
+
+ case 'D':
+ {
+ char *p = memchr(line.active, '\n', line.length);
+ if (!p)
+ return -1;
+
+ ++p;
+ line.alloc -= p - line.active;
+ line.length -= p - line.active;
+ line.active += p - line.active;
+
+ /* reset to start next cycle without reading a new line: */
+ cur_cmd = vec->v;
+ continue;
+ }
+
+ case 'e': {
+#ifdef HAVE_POPEN
+ FILE *pipe;
+ int cmd_length = cur_cmd->x.cmd_txt.text_length;
+ if (s_accum.alloc == 0)
+ line_init(&s_accum, INITIAL_BUFFER_SIZE);
+ s_accum.length = 0;
+
+ if (!cmd_length)
+ {
+ str_append (&line, "", 1);
+ pipe = popen(line.active, "r");
+ }
+ else
+ {
+ cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
+ pipe = popen(cur_cmd->x.cmd_txt.text, "r");
+ output_missing_newline(&output_file);
+ }
+
+ if (pipe != NULL)
+ {
+ while (!feof (pipe))
+ {
+ char buf[4096];
+ int n = fread (buf, sizeof(char), 4096, pipe);
+ if (n > 0)
+ if (!cmd_length)
+ str_append(&s_accum, buf, n);
+ else
+ ck_fwrite(buf, 1, n, output_file.fp);
+ }
+
+ pclose (pipe);
+ if (!cmd_length)
+ {
+ /* Store into pattern space for plain `e' commands */
+ if (s_accum.length &&
+ s_accum.active[s_accum.length - 1] == '\n')
+ s_accum.length--;
+
+ /* Exchange line and s_accum. This can be much
+ cheaper than copying s_accum.active into line.text
+ (for huge lines). */
+ line_exchange(&line, &s_accum);
+ }
+ else
+ flush_output(output_file.fp);
+
+ }
+ else
+ panic(_("error in subprocess"));
+#else
+ panic(_("`e' command not supported"));
+#endif
+ break;
+ }
+
+ case 'g':
+ line_copy(&hold, &line);
+ break;
+
+ case 'G':
+ line_append(&hold, &line);
+ break;
+
+ case 'h':
+ line_copy(&line, &hold);
+ break;
+
+ case 'H':
+ line_append(&line, &hold);
+ break;
+
+ case 'i':
+ output_line(cur_cmd->x.cmd_txt.text,
+ cur_cmd->x.cmd_txt.text_length - 1,
+ true, &output_file);
+ break;
+
+ case 'l':
+ do_list(cur_cmd->x.int_arg == -1
+ ? lcmd_out_line_len
+ : cur_cmd->x.int_arg);
+ break;
+
+ case 'L':
+ output_missing_newline(&output_file);
+ fmt(line.active, line.active + line.length,
+ cur_cmd->x.int_arg == -1
+ ? lcmd_out_line_len
+ : cur_cmd->x.int_arg,
+ output_file.fp);
+ flush_output(output_file.fp);
+ break;
+
+ case 'n':
+ if (!no_default_output)
+ output_line(line.active, line.length, line.chomped, &output_file);
+ if (test_eof(input) || !read_pattern_space(input, vec, false))
+ return -1;
+ break;
+
+ case 'N':
+ str_append(&line, "\n", 1);
+
+ if (test_eof(input) || !read_pattern_space(input, vec, true))
+ {
+ line.length--;
+ if (posixicity == POSIXLY_EXTENDED && !no_default_output)
+ output_line(line.active, line.length, line.chomped,
+ &output_file);
+ return -1;
+ }
+ break;
+
+ case 'p':
+ output_line(line.active, line.length, line.chomped, &output_file);
+ break;
+
+ case 'P':
+ {
+ char *p = memchr(line.active, '\n', line.length);
+ output_line(line.active, p ? p - line.active : line.length,
+ p ? true : line.chomped, &output_file);
+ }
+ break;
+
+ case 'q':
+ if (!no_default_output)
+ output_line(line.active, line.length, line.chomped, &output_file);
+
+ case 'Q':
+ return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
+
+ case 'r':
+ if (cur_cmd->x.fname)
+ {
+ struct append_queue *aq = next_append_slot();
+ aq->fname = cur_cmd->x.fname;
+ }
+ break;
+
+ case 'R':
+ if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
+ {
+ struct append_queue *aq;
+ size_t buflen;
+ char *text = NULL;
+ int result;
+
+ result = ck_getline (&text, &buflen, cur_cmd->x.fp);
+ if (result != EOF)
+ {
+ aq = next_append_slot();
+ aq->free = true;
+ aq->text = text;
+ aq->textlen = result;
+ }
+ }
+ break;
+
+ case 's':
+ do_subst(cur_cmd->x.cmd_subst);
+ break;
+
+ case 't':
+ if (replaced)
+ {
+ replaced = false;
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+ }
+ break;
+
+ case 'T':
+ if (!replaced)
+ {
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+ }
+ else
+ replaced = false;
+ break;
+
+ case 'w':
+ if (cur_cmd->x.fp)
+ output_line(line.active, line.length,
+ line.chomped, cur_cmd->x.outf);
+ break;
+
+ case 'W':
+ if (cur_cmd->x.fp)
+ {
+ char *p = memchr(line.active, '\n', line.length);
+ output_line(line.active, p ? p - line.active : line.length,
+ p ? true : line.chomped, cur_cmd->x.outf);
+ }
+ break;
+
+ case 'x':
+ line_exchange(&line, &hold);
+ break;
+
+ case 'y':
+ {
+#ifdef HAVE_MBRTOWC
+ if (mb_cur_max > 1)
+ {
+ int idx, prev_idx; /* index in the input line. */
+ char **trans;
+ mbstate_t mbstate;
+ memset(&mbstate, 0, sizeof(mbstate_t));
+ for (idx = 0; idx < line.length;)
+ {
+ int mbclen, i;
+ mbclen = MBRLEN (line.active + idx, line.length - idx,
+ &mbstate);
+ /* An invalid sequence, or a truncated multibyte
+ character. We treat it as a singlebyte character.
+ */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+
+ trans = cur_cmd->x.translatemb;
+ /* `i' indicate i-th translate pair. */
+ for (i = 0; trans[2*i] != NULL; i++)
+ {
+ if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
+ {
+ bool move_remain_buffer = false;
+ int trans_len = strlen(trans[2*i+1]);
+
+ if (mbclen < trans_len)
+ {
+ int new_len;
+ new_len = line.length + 1 + trans_len - mbclen;
+ /* We must extend the line buffer. */
+ if (line.alloc < new_len)
+ {
+ /* And we must resize the buffer. */
+ resize_line(&line, new_len);
+ }
+ move_remain_buffer = true;
+ }
+ else if (mbclen > trans_len)
+ {
+ /* We must truncate the line buffer. */
+ move_remain_buffer = true;
+ }
+ prev_idx = idx;
+ if (move_remain_buffer)
+ {
+ int move_len, move_offset;
+ char *move_from, *move_to;
+ /* Move the remaining with \0. */
+ move_from = line.active + idx + mbclen;
+ move_to = line.active + idx + trans_len;
+ move_len = line.length + 1 - idx - mbclen;
+ move_offset = trans_len - mbclen;
+ memmove(move_to, move_from, move_len);
+ line.length += move_offset;
+ idx += move_offset;
+ }
+ strncpy(line.active + prev_idx, trans[2*i+1],
+ trans_len);
+ break;
+ }
+ }
+ idx += mbclen;
+ }
+ }
+ else
+#endif /* HAVE_MBRTOWC */
+ {
+ unsigned char *p, *e;
+ p = CAST(unsigned char *)line.active;
+ for (e=p+line.length; p<e; ++p)
+ *p = cur_cmd->x.translate[*p];
+ }
+ }
+ break;
+
+ case '=':
+ output_missing_newline(&output_file);
+ fprintf(output_file.fp, "%lu\n",
+ CAST(unsigned long)input->line_number);
+ flush_output(output_file.fp);
+ break;
+
+ default:
+ panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
+ }
+ }
+
+#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
+ /* If our top-level program consists solely of commands with
+ ADDR_IS_NUM addresses then once we past the last mentioned
+ line we should be able to quit if no_default_output is true,
+ or otherwise quickly copy input to output. Now whether this
+ optimization is a win or not depends on how cheaply we can
+ implement this for the cases where it doesn't help, as
+ compared against how much time is saved. One semantic
+ difference (which I think is an improvement) is that *this*
+ version will terminate after printing line two in the script
+ "yes | sed -n 2p".
+
+ Don't use this when in-place editing is active, because line
+ numbers restart each time then. */
+ else if (!separate_files)
+ {
+ if (cur_cmd->a1->addr_type == ADDR_IS_NUM
+ && (cur_cmd->a2
+ ? cur_cmd->range_state == RANGE_CLOSED
+ : cur_cmd->a1->addr_number < input->line_number))
+ {
+ /* Skip this address next time */
+ cur_cmd->addr_bang = !cur_cmd->addr_bang;
+ cur_cmd->a1->addr_type = ADDR_IS_NULL;
+ if (cur_cmd->a2)
+ cur_cmd->a2->addr_type = ADDR_IS_NULL;
+
+ /* can we make an optimization? */
+ if (cur_cmd->addr_bang)
+ {
+ if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
+ || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
+ branches--;
+
+ cur_cmd->cmd = '#'; /* replace with no-op */
+ if (branches == 0)
+ cur_cmd = shrink_program(vec, cur_cmd);
+ if (!cur_cmd && no_default_output)
+ return 0;
+ end_cmd = vec->v + vec->v_length;
+ if (!cur_cmd)
+ cur_cmd = end_cmd;
+ continue;
+ }
+ }
+ }
+#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
+
+ /* this is buried down here so that a "continue" statement can skip it */
+ ++cur_cmd;
+ }
+
+ if (!no_default_output)
+ output_line(line.active, line.length, line.chomped, &output_file);
+ return -1;
+}
+
+
+
+/* Apply the compiled script to all the named files. */
+int
+process_files(the_program, argv)
+ struct vector *the_program;
+ char **argv;
+{
+ static char dash[] = "-";
+ static char *stdin_argv[2] = { dash, NULL };
+ struct input input;
+ int status;
+
+ line_init(&line, INITIAL_BUFFER_SIZE);
+ line_init(&hold, 0);
+ line_init(&buffer, 0);
+
+#ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
+ branches = count_branches(the_program);
+#endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
+ input.file_list = stdin_argv;
+ input.reset_at_next_file = true;
+ if (argv && *argv)
+ input.file_list = argv;
+ input.bad_count = 0;
+ input.line_number = 0;
+ input.read_fn = read_always_fail;
+ input.fp = NULL;
+
+ status = EXIT_SUCCESS;
+ while (read_pattern_space(&input, the_program, false))
+ {
+ status = execute_program(the_program, &input);
+ if (status == -1)
+ status = EXIT_SUCCESS;
+ else
+ break;
+ }
+ closedown(&input);
+
+#ifdef DEBUG_LEAKS
+ /* We're about to exit, so these free()s are redundant.
+ But if we're running under a memory-leak detecting
+ implementation of malloc(), we want to explicitly
+ deallocate in order to avoid extraneous noise from
+ the allocator. */
+ release_append_queue();
+ FREE(buffer.text);
+ FREE(hold.text);
+ FREE(line.text);
+ FREE(s_accum.text);
+#endif /*DEBUG_LEAKS*/
+
+ if (input.bad_count)
+ status = 2;
+
+ return status;
+}
diff --git a/sed/fmt.c b/sed/fmt.c
new file mode 100644
index 0000000..693b523
--- /dev/null
+++ b/sed/fmt.c
@@ -0,0 +1,587 @@
+/* `L' command implementation for GNU sed, based on GNU fmt 1.22.
+ Copyright (C) 1994, 1995, 1996, 2002, 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* GNU fmt was written by Ross Paterson <rap@doc.ic.ac.uk>. */
+
+#include "sed.h"
+
+#include <stdio.h>
+#include <ctype.h>
+#include <sys/types.h>
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#ifndef UINT_MAX
+# define UINT_MAX ((unsigned int) ~(unsigned int) 0)
+#endif
+
+#ifndef INT_MAX
+# define INT_MAX ((int) (UINT_MAX >> 1))
+#endif
+
+/* The following parameters represent the program's idea of what is
+ "best". Adjust to taste, subject to the caveats given. */
+
+/* Prefer lines to be LEEWAY % shorter than the maximum width, giving
+ room for optimization. */
+#define LEEWAY 7
+
+/* Costs and bonuses are expressed as the equivalent departure from the
+ optimal line length, multiplied by 10. e.g. assigning something a
+ cost of 50 means that it is as bad as a line 5 characters too short
+ or too long. The definition of SHORT_COST(n) should not be changed.
+ However, EQUIV(n) may need tuning. */
+
+typedef long COST;
+
+#define MAXCOST (~(((unsigned long) 1) << (8 * sizeof (COST) -1)))
+
+#define SQR(n) ((n) * (n))
+#define EQUIV(n) SQR ((COST) (n))
+
+/* Cost of a filled line n chars longer or shorter than best_width. */
+#define SHORT_COST(n) EQUIV ((n) * 10)
+
+/* Cost of the difference between adjacent filled lines. */
+#define RAGGED_COST(n) (SHORT_COST (n) / 2)
+
+/* Basic cost per line. */
+#define LINE_COST EQUIV (70)
+
+/* Cost of breaking a line after the first word of a sentence, where
+ the length of the word is N. */
+#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
+
+/* Cost of breaking a line before the last word of a sentence, where
+ the length of the word is N. */
+#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
+
+/* Bonus for breaking a line at the end of a sentence. */
+#define SENTENCE_BONUS EQUIV (50)
+
+/* Cost of breaking a line after a period not marking end of a sentence.
+ With the definition of sentence we are using (borrowed from emacs, see
+ get_line()) such a break would then look like a sentence break. Hence
+ we assign a very high cost -- it should be avoided unless things are
+ really bad. */
+#define NOBREAK_COST EQUIV (600)
+
+/* Bonus for breaking a line before open parenthesis. */
+#define PAREN_BONUS EQUIV (40)
+
+/* Bonus for breaking a line after other punctuation. */
+#define PUNCT_BONUS EQUIV(40)
+
+/* Credit for breaking a long paragraph one line later. */
+#define LINE_CREDIT EQUIV(3)
+
+/* Size of paragraph buffer in words. Longer paragraphs are handled
+ neatly (cf. flush_paragraph()), so there's little to gain by making
+ these larger. */
+#define MAXWORDS 1000
+
+#define GETC() (parabuf == end_of_parabuf ? EOF : *parabuf++)
+
+/* Extra ctype(3)-style macros. */
+
+#define isopen(c) (strchr ("([`'\"", (c)) != NULL)
+#define isclose(c) (strchr (")]'\"", (c)) != NULL)
+#define isperiod(c) (strchr (".?!", (c)) != NULL)
+
+/* Size of a tab stop, for expansion on input and re-introduction on
+ output. */
+#define TABWIDTH 8
+
+/* Word descriptor structure. */
+
+typedef struct Word WORD;
+
+struct Word
+ {
+
+ /* Static attributes determined during input. */
+
+ const char *text; /* the text of the word */
+ short length; /* length of this word */
+ short space; /* the size of the following space */
+ unsigned paren:1; /* starts with open paren */
+ unsigned period:1; /* ends in [.?!])* */
+ unsigned punct:1; /* ends in punctuation */
+ unsigned final:1; /* end of sentence */
+
+ /* The remaining fields are computed during the optimization. */
+
+ short line_length; /* length of the best line starting here */
+ COST best_cost; /* cost of best paragraph starting here */
+ WORD *next_break; /* break which achieves best_cost */
+ };
+
+/* Forward declarations. */
+
+static bool get_paragraph P_ ((void));
+static int get_line P_ ((int c));
+static int get_space P_ ((int c));
+static int copy_rest P_ ((int c));
+static bool same_para P_ ((int c));
+static void flush_paragraph P_ ((void));
+static void fmt_paragraph P_ ((void));
+static void check_punctuation P_ ((WORD *w));
+static COST base_cost P_ ((WORD *this));
+static COST line_cost P_ ((WORD *next, int len));
+static void put_paragraph P_ ((WORD *finish));
+static void put_line P_ ((WORD *w, int indent));
+static void put_word P_ ((WORD *w));
+static void put_space P_ ((int space));
+
+/* Option values. */
+
+/* User-supplied maximum line width (default WIDTH). The only output
+ lines
+ longer than this will each comprise a single word. */
+static int max_width;
+
+/* Space for the paragraph text. */
+static const char *parabuf;
+
+/* End of space for the paragraph text. */
+static const char *end_of_parabuf;
+
+/* The file on which we output */
+static FILE *outfile;
+
+/* Values derived from the option values. */
+
+/* The preferred width of text lines, set to LEEWAY % less than max_width. */
+static int best_width;
+
+/* Dynamic variables. */
+
+/* Start column of the character most recently read from the input file. */
+static int in_column;
+
+/* Start column of the next character to be written to stdout. */
+static int out_column;
+
+/* The words of a paragraph -- longer paragraphs are handled neatly
+ (cf. flush_paragraph()). */
+static WORD words[MAXWORDS];
+
+/* A pointer into the above word array, indicating the first position
+ after the last complete word. Sometimes it will point at an incomplete
+ word. */
+static WORD *word_limit;
+
+/* Indentation of the first line of the current paragraph. */
+static int first_indent;
+
+/* Indentation of other lines of the current paragraph */
+static int other_indent;
+
+/* The last character read from the input file. */
+static int next_char;
+
+/* If nonzero, the length of the last line output in the current
+ paragraph, used to charge for raggedness at the split point for long
+ paragraphs chosen by fmt_paragraph(). */
+static int last_line_length;
+
+/* read file F and send formatted output to stdout. */
+
+void
+fmt (const char *line, const char *line_end, int max_length, FILE *output_file)
+{
+ parabuf = line;
+ end_of_parabuf = line_end;
+ outfile = output_file;
+
+ max_width = max_length;
+ best_width = max_width * (201 - 2 * LEEWAY) / 200;
+
+ in_column = 0;
+ other_indent = 0;
+ next_char = GETC();
+ while (get_paragraph ())
+ {
+ fmt_paragraph ();
+ put_paragraph (word_limit);
+ }
+}
+
+/* Read a paragraph from input file F. A paragraph consists of a
+ maximal number of non-blank (excluding any prefix) lines
+ with the same indent.
+
+ Return false if end-of-file was encountered before the start of a
+ paragraph, else true. */
+
+static bool
+get_paragraph ()
+{
+ register int c;
+
+ last_line_length = 0;
+ c = next_char;
+
+ /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
+
+ while (c == '\n' || c == EOF)
+ {
+ c = copy_rest (c);
+ if (c == EOF)
+ {
+ next_char = EOF;
+ return false;
+ }
+ putc ('\n', outfile);
+ c = GETC();
+ }
+
+ /* Got a suitable first line for a paragraph. */
+
+ first_indent = in_column;
+ word_limit = words;
+ c = get_line (c);
+
+ /* Read rest of paragraph. */
+
+ other_indent = in_column;
+ while (same_para (c) && in_column == other_indent)
+ c = get_line (c);
+
+ (word_limit - 1)->period = (word_limit - 1)->final = true;
+ next_char = c;
+ return true;
+}
+
+/* Copy to the output a blank line. In the latter, C is \n or EOF.
+ Return the character (\n or EOF) ending the line. */
+
+static int
+copy_rest (register int c)
+{
+ out_column = 0;
+ while (c != '\n' && c != EOF)
+ {
+ putc (c, outfile);
+ c = GETC();
+ }
+ return c;
+}
+
+/* Return true if a line whose first non-blank character after the
+ prefix (if any) is C could belong to the current paragraph,
+ otherwise false. */
+
+static bool
+same_para (register int c)
+{
+ return (c != '\n' && c != EOF);
+}
+
+/* Read a line from the input data given first non-blank character C
+ after the prefix, and the following indent, and break it into words.
+ A word is a maximal non-empty string of non-white characters. A word
+ ending in [.?!]["')\]]* and followed by end-of-line or at least two
+ spaces ends a sentence, as in emacs.
+
+ Return the first non-blank character of the next line. */
+
+static int
+get_line (register int c)
+{
+ int start;
+ register WORD *end_of_word;
+
+ end_of_word = &words[MAXWORDS - 2];
+
+ do
+ { /* for each word in a line */
+
+ /* Scan word. */
+
+ word_limit->text = parabuf - 1;
+ do
+ c = GETC();
+ while (c != EOF && !ISSPACE (c));
+ word_limit->length = parabuf - word_limit->text - (c != EOF);
+ in_column += word_limit->length;
+
+ check_punctuation (word_limit);
+
+ /* Scan inter-word space. */
+
+ start = in_column;
+ c = get_space (c);
+ word_limit->space = in_column - start;
+ word_limit->final = (c == EOF
+ || (word_limit->period
+ && (c == '\n' || word_limit->space > 1)));
+ if (c == '\n' || c == EOF)
+ word_limit->space = word_limit->final ? 2 : 1;
+ if (word_limit == end_of_word)
+ flush_paragraph ();
+ word_limit++;
+ if (c == EOF)
+ {
+ in_column = first_indent;
+ return EOF;
+ }
+ }
+ while (c != '\n');
+
+ in_column = 0;
+ c = GETC();
+ return get_space (c);
+}
+
+/* Read blank characters from the input data, starting with C, and keeping
+ in_column up-to-date. Return first non-blank character. */
+
+static int
+get_space (register int c)
+{
+ for (;;)
+ {
+ if (c == ' ')
+ in_column++;
+ else if (c == '\t')
+ in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
+ else
+ return c;
+ c = GETC();
+ }
+}
+
+/* Set extra fields in word W describing any attached punctuation. */
+
+static void
+check_punctuation (register WORD *w)
+{
+ register const char *start, *finish;
+
+ start = w->text;
+ finish = start + (w->length - 1);
+ w->paren = isopen (*start);
+ w->punct = ISPUNCT (*finish);
+ while (isclose (*finish) && finish > start)
+ finish--;
+ w->period = isperiod (*finish);
+}
+
+/* Flush part of the paragraph to make room. This function is called on
+ hitting the limit on the number of words or characters. */
+
+static void
+flush_paragraph (void)
+{
+ WORD *split_point;
+ register WORD *w;
+ COST best_break;
+
+ /* - format what you have so far as a paragraph,
+ - find a low-cost line break near the end,
+ - output to there,
+ - make that the start of the paragraph. */
+
+ fmt_paragraph ();
+
+ /* Choose a good split point. */
+
+ split_point = word_limit;
+ best_break = MAXCOST;
+ for (w = words->next_break; w != word_limit; w = w->next_break)
+ {
+ if (w->best_cost - w->next_break->best_cost < best_break)
+ {
+ split_point = w;
+ best_break = w->best_cost - w->next_break->best_cost;
+ }
+ if (best_break <= MAXCOST - LINE_CREDIT)
+ best_break += LINE_CREDIT;
+ }
+ put_paragraph (split_point);
+
+ /* Copy words from split_point down to word -- we use memmove because
+ the source and target may overlap. */
+
+ memmove ((char *) words, (char *) split_point,
+ (word_limit - split_point + 1) * sizeof (WORD));
+ word_limit -= split_point - words;
+}
+
+/* Compute the optimal formatting for the whole paragraph by computing
+ and remembering the optimal formatting for each suffix from the empty
+ one to the whole paragraph. */
+
+static void
+fmt_paragraph (void)
+{
+ register WORD *start, *w;
+ register int len;
+ register COST wcost, best;
+ int saved_length;
+
+ word_limit->best_cost = 0;
+ saved_length = word_limit->length;
+ word_limit->length = max_width; /* sentinel */
+
+ for (start = word_limit - 1; start >= words; start--)
+ {
+ best = MAXCOST;
+ len = start == words ? first_indent : other_indent;
+
+ /* At least one word, however long, in the line. */
+
+ w = start;
+ len += w->length;
+ do
+ {
+ w++;
+
+ /* Consider breaking before w. */
+
+ wcost = line_cost (w, len) + w->best_cost;
+ if (start == words && last_line_length > 0)
+ wcost += RAGGED_COST (len - last_line_length);
+ if (wcost < best)
+ {
+ best = wcost;
+ start->next_break = w;
+ start->line_length = len;
+ }
+ len += (w - 1)->space + w->length; /* w > start >= words */
+ }
+ while (len < max_width);
+ start->best_cost = best + base_cost (start);
+ }
+
+ word_limit->length = saved_length;
+}
+
+/* Return the constant component of the cost of breaking before the
+ word THIS. */
+
+static COST
+base_cost (register WORD *this)
+{
+ register COST cost;
+
+ cost = LINE_COST;
+
+ if (this > words)
+ {
+ if ((this - 1)->period)
+ {
+ if ((this - 1)->final)
+ cost -= SENTENCE_BONUS;
+ else
+ cost += NOBREAK_COST;
+ }
+ else if ((this - 1)->punct)
+ cost -= PUNCT_BONUS;
+ else if (this > words + 1 && (this - 2)->final)
+ cost += WIDOW_COST ((this - 1)->length);
+ }
+
+ if (this->paren)
+ cost -= PAREN_BONUS;
+ else if (this->final)
+ cost += ORPHAN_COST (this->length);
+
+ return cost;
+}
+
+/* Return the component of the cost of breaking before word NEXT that
+ depends on LEN, the length of the line beginning there. */
+
+static COST
+line_cost (register WORD *next, register int len)
+{
+ register int n;
+ register COST cost;
+
+ if (next == word_limit)
+ return 0;
+ n = best_width - len;
+ cost = SHORT_COST (n);
+ if (next->next_break != word_limit)
+ {
+ n = len - next->line_length;
+ cost += RAGGED_COST (n);
+ }
+ return cost;
+}
+
+/* Output to stdout a paragraph from word up to (but not including)
+ FINISH, which must be in the next_break chain from word. */
+
+static void
+put_paragraph (register WORD *finish)
+{
+ register WORD *w;
+
+ put_line (words, first_indent);
+ for (w = words->next_break; w != finish; w = w->next_break)
+ put_line (w, other_indent);
+}
+
+/* Output to stdout the line beginning with word W, beginning in column
+ INDENT, including the prefix (if any). */
+
+static void
+put_line (register WORD *w, int indent)
+{
+ register WORD *endline;
+ out_column = 0;
+ put_space (indent);
+
+ endline = w->next_break - 1;
+ for (; w != endline; w++)
+ {
+ put_word (w);
+ put_space (w->space);
+ }
+ put_word (w);
+ last_line_length = out_column;
+ putc ('\n', outfile);
+}
+
+/* Output to stdout the word W. */
+
+static void
+put_word (register WORD *w)
+{
+ register const char *s;
+ register int n;
+
+ s = w->text;
+ for (n = w->length; n != 0; n--)
+ putc (*s++, outfile);
+ out_column += w->length;
+}
+
+/* Output to stdout SPACE spaces, or equivalent tabs. */
+
+static void
+put_space (int space)
+{
+ out_column += space;
+ while (space--)
+ putc (' ', outfile);
+}
diff --git a/sed/mbcs.c b/sed/mbcs.c
new file mode 100644
index 0000000..fe471f2
--- /dev/null
+++ b/sed/mbcs.c
@@ -0,0 +1,56 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "sed.h"
+#include <stdlib.h>
+
+int mb_cur_max;
+
+#ifdef HAVE_MBRTOWC
+/* Add a byte to the multibyte character represented by the state
+ CUR_STAT, and answer its length if a character is completed,
+ or -2 if it is yet to be completed. */
+int brlen (ch, cur_stat)
+ int ch;
+ mbstate_t *cur_stat;
+{
+ char c = ch;
+
+ /* If we use the generic brlen, then MBRLEN == mbrlen. */
+ int result = mbrtowc(NULL, &c, 1, cur_stat);
+
+ /* An invalid sequence is treated like a singlebyte character. */
+ if (result == -1)
+ {
+ memset (cur_stat, 0, sizeof (mbstate_t));
+ return 1;
+ }
+
+ return result;
+}
+#endif
+
+void
+initialize_mbcs ()
+{
+#ifdef HAVE_MBRTOWC
+ mb_cur_max = MB_CUR_MAX;
+#else
+ mb_cur_max = 1;
+#endif
+}
+
diff --git a/sed/regexp.c b/sed/regexp.c
new file mode 100644
index 0000000..228bf07
--- /dev/null
+++ b/sed/regexp.c
@@ -0,0 +1,238 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1999, 2002, 2003 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "sed.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+
+#ifdef gettext_noop
+# define N_(String) gettext_noop(String)
+#else
+# define N_(String) (String)
+#endif
+
+extern bool use_extended_syntax_p;
+
+static const char errors[] =
+ "no previous regular expression\0"
+ "cannot specify modifiers on empty regexp";
+
+#define NO_REGEX (errors)
+#define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression")))
+#define END_ERRORS (BAD_MODIF + sizeof(N_("cannot specify modifiers on empty regexp")))
+
+
+
+regex_t *
+compile_regex(b, flags, needed_sub)
+ struct buffer *b;
+ int flags;
+ int needed_sub;
+{
+ regex_t *new_regex;
+
+ char *last_re = NULL;
+ size_t last_re_len;
+
+ /* // matches the last RE */
+ if (size_buffer(b) == 0)
+ {
+ if (flags > 0)
+ bad_prog(_(BAD_MODIF));
+ return NULL;
+ }
+
+ last_re_len = size_buffer(b);
+ last_re = ck_memdup(get_buffer(b), last_re_len);
+
+ new_regex = MALLOC(1, regex_t);
+
+#ifdef REG_PERL
+ {
+ int errcode;
+ errcode = regncomp(new_regex, last_re, last_re_len,
+ (needed_sub ? 0 : REG_NOSUB)
+ | flags
+ | extended_regexp_flags);
+
+ if (errcode)
+ {
+ char errorbuf[200];
+ regerror(errcode, NULL, errorbuf, 200);
+ bad_prog(gettext(errorbuf));
+ }
+ }
+#else
+ new_regex->fastmap = malloc (1 << (sizeof (char) * 8));
+ {
+ const char *error;
+ int syntax = ((extended_regexp_flags & REG_EXTENDED)
+ ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC)
+ & ~RE_UNMATCHED_RIGHT_PAREN_ORD;
+
+ syntax |= RE_NO_POSIX_BACKTRACKING;
+#ifdef RE_ICASE
+ syntax |= (flags & REG_ICASE) ? RE_ICASE : 0;
+#endif
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (flags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ }
+
+ /* GNU regex does not process \t & co. */
+ last_re_len = normalize_text(last_re, last_re_len, TEXT_REGEX);
+ re_set_syntax (syntax);
+ error = re_compile_pattern (last_re, last_re_len, new_regex);
+ new_regex->newline_anchor = (flags & REG_NEWLINE) != 0;
+
+ new_regex->translate = NULL;
+#ifndef RE_ICASE
+ if (flags & REG_ICASE)
+ {
+ static char translate[1 << (sizeof(char) * 8)];
+ int i;
+ for (i = 0; i < sizeof(translate) / sizeof(char); i++)
+ translate[i] = tolower (i);
+
+ new_regex->translate = translate;
+ }
+#endif
+
+ if (error)
+ bad_prog(error);
+ }
+#endif
+
+ FREE(last_re);
+
+ /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
+ if (new_regex->re_nsub < needed_sub && posixicity == POSIXLY_EXTENDED)
+ {
+ char buf[200];
+ sprintf(buf, _("invalid reference \\%d on `s' command's RHS"),
+ needed_sub);
+ bad_prog(buf);
+ }
+
+ return new_regex;
+}
+
+#ifdef REG_PERL
+static void
+copy_regs (regs, pmatch, nregs)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs;
+{
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (!regs->start)
+ { /* No. So allocate them with malloc. */
+ regs->start = MALLOC (need_regs, regoff_t);
+ regs->end = MALLOC (need_regs, regoff_t);
+ regs->num_regs = need_regs;
+ }
+ else if (need_regs > regs->num_regs)
+ { /* Yes. We also need more elements than were already
+ allocated, so reallocate them. */
+ regs->start = REALLOC (regs->start, need_regs, regoff_t);
+ regs->end = REALLOC (regs->end, need_regs, regoff_t);
+ regs->num_regs = need_regs;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+}
+#endif
+
+int
+match_regex(regex, buf, buflen, buf_start_offset, regarray, regsize)
+ regex_t *regex;
+ char *buf;
+ size_t buflen;
+ size_t buf_start_offset;
+ struct re_registers *regarray;
+ int regsize;
+{
+ int ret;
+ static regex_t *regex_last;
+#ifdef REG_PERL
+ regmatch_t rm[10], *regmatch = rm;
+ if (regsize > 10)
+ regmatch = (regmatch_t *) alloca (sizeof (regmatch_t) * regsize);
+#endif
+
+ /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */
+
+ /* Keep track of the last regexp matched. */
+ if (!regex)
+ {
+ regex = regex_last;
+ if (!regex_last)
+ bad_prog(_(NO_REGEX));
+ }
+ else
+ regex_last = regex;
+
+#ifdef REG_PERL
+ regmatch[0].rm_so = CAST(int)buf_start_offset;
+ regmatch[0].rm_eo = CAST(int)buflen;
+ ret = regexec (regex, buf, regsize, regmatch, REG_STARTEND);
+
+ if (regsize)
+ copy_regs (regarray, regmatch, regsize);
+
+ return (ret == 0);
+#else
+ regex->regs_allocated = REGS_REALLOCATE;
+
+ ret = re_search (regex, buf, buflen, buf_start_offset,
+ buflen - buf_start_offset,
+ regsize ? regarray : NULL);
+
+ return (ret > -1);
+#endif
+}
+
+
+#ifdef DEBUG_LEAKS
+void
+release_regex(regex)
+ regex_t *regex;
+{
+ regfree(regex);
+ FREE(regex);
+}
+#endif /*DEBUG_LEAKS*/
diff --git a/sed/sed.c b/sed/sed.c
new file mode 100644
index 0000000..90fe368
--- /dev/null
+++ b/sed/sed.c
@@ -0,0 +1,308 @@
+#define COPYRIGHT_NOTICE "Copyright (C) 2003 Free Software Foundation, Inc."
+#define BUG_ADDRESS "bonzini@gnu.org"
+
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#include "sed.h"
+
+
+#include <stdio.h>
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#else
+# include <string.h>
+#endif /*HAVE_STRINGS_H*/
+#ifdef HAVE_MEMORY_H
+# include <memory.h>
+#endif
+
+#ifndef HAVE_STRCHR
+# define strchr index
+# define strrchr rindex
+#endif
+
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_LOCALE_H
+# include <locale.h>
+#endif
+#include "getopt.h"
+
+#ifndef BOOTSTRAP
+#ifndef HAVE_STDLIB_H
+ extern char *getenv P_((const char *));
+#endif
+#endif
+
+#ifndef HAVE_STRTOUL
+# define ATOI(x) atoi(x)
+#else
+# define ATOI(x) strtoul(x, NULL, 0)
+#endif
+
+int extended_regexp_flags = 0;
+
+/* If set, fflush(stdout) on every line output. */
+bool unbuffered_output = false;
+
+/* If set, don't write out the line unless explicitly told to */
+bool no_default_output = false;
+
+/* If set, reset line counts on every new file. */
+bool separate_files = false;
+
+/* How do we edit files in-place? (we don't if NULL) */
+char *in_place_extension = NULL;
+
+/* Do we need to be pedantically POSIX compliant? */
+enum posixicity_types posixicity;
+
+/* How long should the `l' command's output line be? */
+countT lcmd_out_line_len = 70;
+
+/* The complete compiled SED program that we are going to run: */
+static struct vector *the_program = NULL;
+
+static void usage P_((int));
+static void
+usage(status)
+ int status;
+{
+ FILE *out = status ? stderr : stdout;
+
+#ifdef REG_PERL
+#define PERL_HELP _(" -R, --regexp-perl\n use Perl 5's regular expressions syntax in the script.\n")
+#else
+#define PERL_HELP ""
+#endif
+
+ fprintf(out, _("\
+Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n\
+\n"), myname);
+
+ fprintf(out, _(" -n, --quiet, --silent\n\
+ suppress automatic printing of pattern space\n"));
+ fprintf(out, _(" -e script, --expression=script\n\
+ add the script to the commands to be executed\n"));
+ fprintf(out, _(" -f script-file, --file=script-file\n\
+ add the contents of script-file to the commands to be executed\n"));
+ fprintf(out, _(" -i[SUFFIX], --in-place[=SUFFIX]\n\
+ edit files in place (makes backup if extension supplied)\n"));
+ fprintf(out, _(" -l N, --line-length=N\n\
+ specify the desired line-wrap length for the `l' command\n"));
+ fprintf(out, _(" --posix\n\
+ disable all GNU extensions.\n"));
+ fprintf(out, _(" -r, --regexp-extended\n\
+ use extended regular expressions in the script.\n"));
+ fprintf(out, PERL_HELP);
+ fprintf(out, _(" -s, --separate\n\
+ consider files as separate rather than as a single continuous\n\
+ long stream.\n"));
+ fprintf(out, _(" -u, --unbuffered\n\
+ load minimal amounts of data from the input files and flush\n\
+ the output buffers more often\n"));
+ fprintf(out, _(" --help display this help and exit\n"));
+ fprintf(out, _(" --version output version information and exit\n"));
+ fprintf(out, _("\n\
+If no -e, --expression, -f, or --file option is given, then the first\n\
+non-option argument is taken as the sed script to interpret. All\n\
+remaining arguments are names of input files; if no input files are\n\
+specified, then the standard input is read.\n\
+\n"));
+ fprintf(out, _("E-mail bug reports to: %s .\n\
+Be sure to include the word ``%s'' somewhere in the ``Subject:'' field.\n"),
+ BUG_ADDRESS, PACKAGE);
+
+ ck_fclose (NULL);
+ exit (status);
+}
+
+int
+main(argc, argv)
+ int argc;
+ char **argv;
+{
+#ifdef REG_PERL
+#define SHORTOPTS "snrRue:f:l:i::V:"
+#else
+#define SHORTOPTS "snrue:f:l:i::V:"
+#endif
+
+ static struct option longopts[] = {
+ {"regexp-extended", 0, NULL, 'r'},
+#ifdef REG_PERL
+ {"regexp-perl", 0, NULL, 'R'},
+#endif
+ {"expression", 1, NULL, 'e'},
+ {"file", 1, NULL, 'f'},
+ {"in-place", 2, NULL, 'i'},
+ {"line-length", 1, NULL, 'l'},
+ {"quiet", 0, NULL, 'n'},
+ {"posix", 0, NULL, 'p'},
+ {"silent", 0, NULL, 'n'},
+ {"separate", 0, NULL, 's'},
+ {"unbuffered", 0, NULL, 'u'},
+ {"version", 0, NULL, 'v'},
+ {"help", 0, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ int opt;
+ int return_code;
+ const char *cols = getenv("COLS");
+
+ initialize_main (&argc, &argv);
+#if HAVE_SETLOCALE
+ /* Set locale according to user's wishes. */
+ setlocale (LC_ALL, "");
+#endif
+ initialize_mbcs ();
+
+#if ENABLE_NLS
+
+ /* Tell program which translations to use and where to find. */
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+#endif
+
+ if (getenv("POSIXLY_CORRECT") != NULL)
+ posixicity = POSIXLY_CORRECT;
+ else
+ posixicity = POSIXLY_EXTENDED;
+
+ /* If environment variable `COLS' is set, use its value for
+ the baseline setting of `lcmd_out_line_len'. The "-1"
+ is to avoid gratuitous auto-line-wrap on ttys.
+ */
+ if (cols)
+ {
+ countT t = ATOI(cols);
+ if (t > 1)
+ lcmd_out_line_len = t-1;
+ }
+
+ myname = *argv;
+ while ((opt = getopt_long(argc, argv, SHORTOPTS, longopts, NULL)) != EOF)
+ {
+ switch (opt)
+ {
+ case 'n':
+ no_default_output = true;
+ break;
+ case 'e':
+ the_program = compile_string(the_program, optarg, strlen(optarg));
+ break;
+ case 'f':
+ the_program = compile_file(the_program, optarg);
+ break;
+
+ case 'i':
+ separate_files = true;
+ if (optarg == NULL)
+ /* use no backups */
+ in_place_extension = ck_strdup ("*");
+
+ else if (strchr(optarg, '*') != NULL)
+ in_place_extension = ck_strdup(optarg);
+
+ else
+ {
+ in_place_extension = MALLOC (strlen(optarg) + 2, char);
+ in_place_extension[0] = '*';
+ strcpy (in_place_extension + 1, optarg);
+ }
+
+ break;
+
+ case 'l':
+ lcmd_out_line_len = ATOI(optarg);
+ break;
+
+ case 'p':
+ posixicity = POSIXLY_BASIC;
+ break;
+
+ case 'r':
+ if (extended_regexp_flags)
+ usage(4);
+ extended_regexp_flags = REG_EXTENDED;
+ break;
+
+#ifdef REG_PERL
+ case 'R':
+ if (extended_regexp_flags)
+ usage(4);
+ extended_regexp_flags = REG_PERL;
+ break;
+#endif
+
+ case 's':
+ separate_files = true;
+ break;
+
+ case 'u':
+ unbuffered_output = true;
+ break;
+
+ case 'v':
+#ifdef REG_PERL
+ fprintf(stdout, _("super-sed version %s\n"), VERSION);
+ fprintf(stdout, _("based on GNU sed version %s\n\n"), SED_FEATURE_VERSION);
+#else
+ fprintf(stdout, _("GNU sed version %s\n"), VERSION);
+#endif
+ fprintf(stdout, _("%s\n\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n\
+to the extent permitted by law.\n\
+"), COPYRIGHT_NOTICE);
+ ck_fclose (NULL);
+ exit (0);
+ case 'h':
+ usage(0);
+ default:
+ usage(4);
+ }
+ }
+
+ if (!the_program)
+ {
+ if (optind < argc)
+ {
+ char *arg = argv[optind++];
+ the_program = compile_string(the_program, arg, strlen(arg));
+ }
+ else
+ usage(4);
+ }
+ check_final_program(the_program);
+
+ return_code = process_files(the_program, argv+optind);
+
+ finish_program(the_program);
+ ck_fclose(NULL);
+
+ return return_code;
+}
diff --git a/sed/sed.h b/sed/sed.h
new file mode 100644
index 0000000..01969d9
--- /dev/null
+++ b/sed/sed.h
@@ -0,0 +1,254 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "basicdefs.h"
+#include "regex.h"
+
+#ifndef BOOTSTRAP
+#include <stdio.h>
+#endif
+
+#include "utils.h"
+
+/* Struct vector is used to describe a compiled sed program. */
+struct vector {
+ struct sed_cmd *v; /* a dynamically allocated array */
+ size_t v_allocated; /* ... number slots allocated */
+ size_t v_length; /* ... number of slots in use */
+};
+
+/* This structure tracks files used by sed so that they may all be
+ closed cleanly at normal program termination. A flag is kept that tells
+ if a missing newline was encountered, so that it is added on the
+ next line and the two lines are not concatenated. */
+struct output {
+ char *name;
+ bool missing_newline;
+ FILE *fp;
+ struct output *link;
+};
+
+struct text_buf {
+ char *text;
+ size_t text_length;
+};
+
+enum replacement_types {
+ REPL_ASIS = 0,
+ REPL_UPPERCASE = 1,
+ REPL_LOWERCASE = 2,
+ REPL_UPPERCASE_FIRST = 4,
+ REPL_LOWERCASE_FIRST = 8,
+ REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
+
+ /* These are given to aid in debugging */
+ REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
+ REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
+ REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
+ REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
+};
+
+enum text_types {
+ TEXT_BUFFER,
+ TEXT_REPLACEMENT,
+ TEXT_REGEX
+};
+
+enum posixicity_types {
+ POSIXLY_EXTENDED, /* with GNU extensions */
+ POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */
+ POSIXLY_BASIC /* pedantically POSIX */
+};
+
+enum addr_state {
+ RANGE_INACTIVE, /* never been active */
+ RANGE_ACTIVE, /* between first and second address */
+ RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */
+};
+
+enum addr_types {
+ ADDR_IS_NULL, /* null address */
+ ADDR_IS_REGEX, /* a.addr_regex is valid */
+ ADDR_IS_NUM, /* a.addr_number is valid */
+ ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */
+ ADDR_IS_STEP, /* address is +N (only valid for addr2) */
+ ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */
+ ADDR_IS_LAST /* address is $ */
+};
+
+struct addr {
+ enum addr_types addr_type;
+ countT addr_number;
+ countT addr_step;
+ regex_t *addr_regex;
+};
+
+
+struct replacement {
+ char *prefix;
+ size_t prefix_length;
+ int subst_id;
+ enum replacement_types repl_type;
+ struct replacement *next;
+};
+
+struct subst {
+ regex_t *regx;
+ struct replacement *replacement;
+ countT numb; /* if >0, only substitute for match number "numb" */
+ struct output *outf; /* 'w' option given */
+ unsigned global : 1; /* 'g' option given */
+ unsigned print : 2; /* 'p' option given (before/after eval) */
+ unsigned eval : 1; /* 'e' option given */
+ unsigned max_id : 4; /* maximum backreference on the RHS */
+};
+
+#ifdef REG_PERL
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+#endif
+
+
+
+struct sed_cmd {
+ struct addr *a1; /* save space: usually is NULL */
+ struct addr *a2;
+
+ /* See description the enum, above. */
+ enum addr_state range_state;
+
+ /* Non-zero if command is to be applied to non-matches. */
+ char addr_bang;
+
+ /* The actual command character. */
+ char cmd;
+
+ /* auxiliary data for various commands */
+ union {
+ /* This structure is used for a, i, and c commands. */
+ struct text_buf cmd_txt;
+
+ /* This is used for the l, q and Q commands. */
+ int int_arg;
+
+ /* This is used for the {}, b, and t commands. */
+ countT jump_index;
+
+ /* This is used for the r command. */
+ char *fname;
+
+ /* This is used for the hairy s command. */
+ struct subst *cmd_subst;
+
+ /* This is used for the w command. */
+ struct output *outf;
+
+ /* This is used for the R command. */
+ FILE *fp;
+
+ /* This is used for the y command. */
+ unsigned char *translate;
+ char **translatemb;
+ } x;
+};
+
+
+
+void bad_prog P_((const char *why));
+size_t normalize_text P_((char *text, size_t len, enum text_types buftype));
+struct vector *compile_string P_((struct vector *, char *str, size_t len));
+struct vector *compile_file P_((struct vector *, const char *cmdfile));
+void check_final_program P_((struct vector *));
+void rewind_read_files P_((void));
+void finish_program P_((struct vector *));
+
+regex_t *compile_regex P_((struct buffer *b, int flags, int needed_sub));
+int match_regex P_((regex_t *regex,
+ char *buf, size_t buflen, size_t buf_start_offset,
+ struct re_registers *regarray, int regsize));
+#ifdef DEBUG_LEAKS
+void release_regex P_((regex_t *));
+#endif
+
+int process_files P_((struct vector *, char **argv));
+
+int main P_((int, char **));
+
+extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file));
+
+extern int extended_regexp_flags;
+
+/* If set, fflush(stdout) on every line output. */
+extern bool unbuffered_output;
+
+/* If set, don't write out the line unless explicitly told to. */
+extern bool no_default_output;
+
+/* If set, reset line counts on every new file. */
+extern bool separate_files;
+
+/* Do we need to be pedantically POSIX compliant? */
+extern enum posixicity_types posixicity;
+
+/* How long should the `l' command's output line be? */
+extern countT lcmd_out_line_len;
+
+/* How do we edit files in-place? (we don't if NULL) */
+extern char *in_place_extension;
+
+/* Should we use EREs? */
+extern bool use_extended_syntax_p;
+
+/* Declarations for multibyte character sets. */
+extern int mb_cur_max;
+
+#ifdef HAVE_MBRTOWC
+#ifdef HAVE_BTOWC
+#define MBRTOWC(pwc, s, n, ps) \
+ (mb_cur_max == 1 ? \
+ (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
+ mbrtowc ((pwc), (s), (n), (ps)))
+#else
+#define MBRTOWC(pwc, s, n, ps) \
+ mbrtowc ((pwc), (s), (n), (ps))
+#endif
+
+#define MBRLEN(s, n, ps) \
+ (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
+
+#define BRLEN(ch, ps) \
+ (mb_cur_max == 1 ? 1 : brlen (ch, ps))
+
+#else
+#define MBRLEN(s, n, ps) 1
+#define BRLEN(ch, ps) 1
+#endif
+
+extern int brlen P_ ((int ch, mbstate_t *ps));
+extern void initialize_mbcs P_ ((void));
+
diff --git a/testsuite/0range.good b/testsuite/0range.good
new file mode 100644
index 0000000..7cfab5b
--- /dev/null
+++ b/testsuite/0range.good
@@ -0,0 +1 @@
+yes
diff --git a/testsuite/0range.inp b/testsuite/0range.inp
new file mode 100644
index 0000000..c09c47b
--- /dev/null
+++ b/testsuite/0range.inp
@@ -0,0 +1,6 @@
+1
+2
+3
+4
+aaa
+yes
diff --git a/testsuite/0range.sed b/testsuite/0range.sed
new file mode 100644
index 0000000..33aa8b8
--- /dev/null
+++ b/testsuite/0range.sed
@@ -0,0 +1 @@
+0,/aaa/d
diff --git a/testsuite/8bit.good b/testsuite/8bit.good
new file mode 100644
index 0000000..1bd5178
--- /dev/null
+++ b/testsuite/8bit.good
@@ -0,0 +1,9 @@
+äƤâ¤è ¤ßäÆ»ý¤Á
+·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á
+¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù
+²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í
+¤½¤é¤ß¤Ä ÆüËܤιñ¤Ï
+¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì
+¤·¤­¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤»
+¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á
+²È¤ò¤â̾¤ò¤â
diff --git a/testsuite/8bit.inp b/testsuite/8bit.inp
new file mode 100644
index 0000000..8c9c4bb
--- /dev/null
+++ b/testsuite/8bit.inp
@@ -0,0 +1,9 @@
+äƤâ¤è ¤ßäÆ»ý¤Á
+·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á
+¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù
+²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í
+¤½¤é¤ß¤Ä ÂçϤιñ¤Ï
+¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì
+¤·¤­¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤»
+¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á
+²È¤ò¤â̾¤ò¤â
diff --git a/testsuite/8bit.sed b/testsuite/8bit.sed
new file mode 100644
index 0000000..7b3ed8d
--- /dev/null
+++ b/testsuite/8bit.sed
@@ -0,0 +1,21 @@
+# The first poem from the Man'yoshu. I like Hitomaro's poems better
+# but I couldn't find a copy of any of them in Japanese. This version
+# of this poem is from $BNc2r8E8l<-E5(B($BBh;0HG(B)$B;0>JF2(B.
+#
+# Speaking of Hitomaro, here is the english translation of one of my
+# favorites. I just know that everyone reading these test cases wants
+# to see this.
+#
+# In the autumn mountains
+# The yellow leaves are so thick.
+# Alas, how shall I seek my love
+# Who has wandered away?
+#
+# I see the messenger come
+# As the yellow leaves are falling.
+# Oh, well I remember
+# How on such a day we used to meet--
+# My lover and I!
+# -- Kakinomoto Hitomaro
+#
+s/ÂçÏÂ/ÆüËÜ/
diff --git a/testsuite/8to7.good b/testsuite/8to7.good
new file mode 100644
index 0000000..4485882
--- /dev/null
+++ b/testsuite/8to7.good
@@ -0,0 +1,14 @@
+\344\306\244\342\244\350 \244\337\344\306\273\375\244\301$
+\267\241\266\372\244\342\244\350 \244\337\267\241\266\372\273\375\244\
+\301$
+\244\263\244\316\265\326\244\313 \272\332\305\246\244\336\244\271\273\
+\371$
+\262\310\264\326\244\253\244\312 \271\360\244\351\244\265\244\315$
+\244\275\244\351\244\337\244\304 \302\347\317\302\244\316\271\361\244\
+\317$
+\244\252\244\267\244\343\244\312\244\331\244\306 \244\357\244\354\244\
+\263\244\275\265\357\244\354$
+\244\267\244\255\244\312\244\331\244\306 \244\357\244\354\244\263\244\
+\275 \272\302\244\273$
+\244\357\244\313\244\263\244\275\244\317 \271\360\244\351\244\341$
+\262\310\244\362\244\342\314\276\244\362\244\342$
diff --git a/testsuite/8to7.inp b/testsuite/8to7.inp
new file mode 100644
index 0000000..8c9c4bb
--- /dev/null
+++ b/testsuite/8to7.inp
@@ -0,0 +1,9 @@
+äƤâ¤è ¤ßäÆ»ý¤Á
+·¡¶ú¤â¤è ¤ß·¡¶ú»ý¤Á
+¤³¤ÎµÖ¤Ë ºÚŦ¤Þ¤¹»ù
+²È´Ö¤«¤Ê ¹ð¤é¤µ¤Í
+¤½¤é¤ß¤Ä ÂçϤιñ¤Ï
+¤ª¤·¤ã¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½µï¤ì
+¤·¤­¤Ê¤Ù¤Æ ¤ï¤ì¤³¤½ ºÂ¤»
+¤ï¤Ë¤³¤½¤Ï ¹ð¤é¤á
+²È¤ò¤â̾¤ò¤â
diff --git a/testsuite/8to7.sed b/testsuite/8to7.sed
new file mode 100644
index 0000000..f9d3f50
--- /dev/null
+++ b/testsuite/8to7.sed
@@ -0,0 +1 @@
+l;d
diff --git a/testsuite/BOOST.tests b/testsuite/BOOST.tests
new file mode 100644
index 0000000..98fd3b6
--- /dev/null
+++ b/testsuite/BOOST.tests
@@ -0,0 +1,829 @@
+;
+;
+; this file contains a script of tests to run through regress.exe
+;
+; comments start with a semicolon and proceed to the end of the line
+;
+; changes to regular expression compile flags start with a "-" as the first
+; non-whitespace character and consist of a list of the printable names
+; of the flags, for example "match_default"
+;
+; Other lines contain a test to perform using the current flag status
+; the first token contains the expression to compile, the second the string
+; to match it against. If the second string is "!" then the expression should
+; not compile, that is the first string is an invalid regular expression.
+; This is then followed by a list of integers that specify what should match,
+; each pair represents the starting and ending positions of a subexpression
+; starting with the zeroth subexpression (the whole match).
+; A value of -1 indicates that the subexpression should not take part in the
+; match at all, if the first value is -1 then no part of the expression should
+; match the string.
+;
+; Tests taken from BOOST testsuite and adapted to glibc regex.
+;
+; Boost Software License - Version 1.0 - August 17th, 2003
+;
+; Permission is hereby granted, free of charge, to any person or organization
+; obtaining a copy of the software and accompanying documentation covered by
+; this license (the "Software") to use, reproduce, display, distribute,
+; execute, and transmit the Software, and to prepare derivative works of the
+; Software, and to permit third-parties to whom the Software is furnished to
+; do so, all subject to the following:
+;
+; The copyright notices in the Software and this entire statement, including
+; the above license grant, this restriction and the following disclaimer,
+; must be included in all copies of the Software, in whole or in part, and
+; all derivative works of the Software, unless such copies or derivative
+; works are solely in the form of machine-executable object code generated by
+; a source language processor.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+; DEALINGS IN THE SOFTWARE.
+;
+
+- match_default normal REG_EXTENDED
+
+;
+; try some really simple literals:
+a a 0 1
+Z Z 0 1
+Z aaa -1 -1
+Z xxxxZZxxx 4 5
+
+; and some simple brackets:
+(a) zzzaazz 3 4 3 4
+() zzz 0 0 0 0
+() "" 0 0 0 0
+( !
+) ) 0 1
+(aa !
+aa) baa)b 1 4
+a b -1 -1
+\(\) () 0 2
+\(a\) (a) 0 3
+\() () 0 2
+(\) !
+p(a)rameter ABCparameterXYZ 3 12 4 5
+[pq](a)rameter ABCparameterXYZ 3 12 4 5
+
+; now try escaped brackets:
+- match_default bk_parens REG_BASIC
+\(a\) zzzaazz 3 4 3 4
+\(\) zzz 0 0 0 0
+\(\) "" 0 0 0 0
+\( !
+\) !
+\(aa !
+aa\) !
+() () 0 2
+(a) (a) 0 3
+(\) !
+\() !
+
+; now move on to "." wildcards
+- match_default normal REG_EXTENDED REG_STARTEND
+. a 0 1
+. \n 0 1
+. \r 0 1
+. \0 0 1
+
+;
+; now move on to the repetion ops,
+; starting with operator *
+- match_default normal REG_EXTENDED
+a* b 0 0
+ab* a 0 1
+ab* ab 0 2
+ab* sssabbbbbbsss 3 10
+ab*c* a 0 1
+ab*c* abbb 0 4
+ab*c* accc 0 4
+ab*c* abbcc 0 5
+*a !
+\<* !
+\>* !
+\n* \n\n 0 2
+\** ** 0 2
+\* * 0 1
+
+; now try operator +
+ab+ a -1 -1
+ab+ ab 0 2
+ab+ sssabbbbbbsss 3 10
+ab+c+ a -1 -1
+ab+c+ abbb -1 -1
+ab+c+ accc -1 -1
+ab+c+ abbcc 0 5
++a !
+\<+ !
+\>+ !
+\n+ \n\n 0 2
+\+ + 0 1
+\+ ++ 0 1
+\++ ++ 0 2
+
+; now try operator ?
+- match_default normal REG_EXTENDED
+a? b 0 0
+ab? a 0 1
+ab? ab 0 2
+ab? sssabbbbbbsss 3 5
+ab?c? a 0 1
+ab?c? abbb 0 2
+ab?c? accc 0 2
+ab?c? abcc 0 3
+?a !
+\<? !
+\>? !
+\n? \n\n 0 1
+\? ? 0 1
+\? ?? 0 1
+\?? ?? 0 1
+
+; now try operator {}
+- match_default normal REG_EXTENDED
+a{2} a -1 -1
+a{2} aa 0 2
+a{2} aaa 0 2
+a{2,} a -1 -1
+a{2,} aa 0 2
+a{2,} aaaaa 0 5
+a{2,4} a -1 -1
+a{2,4} aa 0 2
+a{2,4} aaa 0 3
+a{2,4} aaaa 0 4
+a{2,4} aaaaa 0 4
+a{} !
+a{2 !
+a} a} 0 2
+\{\} {} 0 2
+
+- match_default normal REG_BASIC
+a\{2\} a -1 -1
+a\{2\} aa 0 2
+a\{2\} aaa 0 2
+a\{2,\} a -1 -1
+a\{2,\} aa 0 2
+a\{2,\} aaaaa 0 5
+a\{2,4\} a -1 -1
+a\{2,4\} aa 0 2
+a\{2,4\} aaa 0 3
+a\{2,4\} aaaa 0 4
+a\{2,4\} aaaaa 0 4
+{} {} 0 2
+
+; now test the alternation operator |
+- match_default normal REG_EXTENDED
+a|b a 0 1
+a|b b 0 1
+a(b|c) ab 0 2 1 2
+a(b|c) ac 0 2 1 2
+a(b|c) ad -1 -1 -1 -1
+a\| a| 0 2
+
+; now test the set operator []
+- match_default normal REG_EXTENDED
+; try some literals first
+[abc] a 0 1
+[abc] b 0 1
+[abc] c 0 1
+[abc] d -1 -1
+[^bcd] a 0 1
+[^bcd] b -1 -1
+[^bcd] d -1 -1
+[^bcd] e 0 1
+a[b]c abc 0 3
+a[ab]c abc 0 3
+a[^ab]c adc 0 3
+a[]b]c a]c 0 3
+a[[b]c a[c 0 3
+a[-b]c a-c 0 3
+a[^]b]c adc 0 3
+a[^-b]c adc 0 3
+a[b-]c a-c 0 3
+a[b !
+a[] !
+
+; then some ranges
+[b-e] a -1 -1
+[b-e] b 0 1
+[b-e] e 0 1
+[b-e] f -1 -1
+[^b-e] a 0 1
+[^b-e] b -1 -1
+[^b-e] e -1 -1
+[^b-e] f 0 1
+a[1-3]c a2c 0 3
+a[3-1]c !
+a[1-3-5]c !
+a[1- !
+
+; and some classes
+a[[:alpha:]]c abc 0 3
+a[[:unknown:]]c !
+a[[: !
+a[[:alpha !
+a[[:alpha:] !
+a[[:alpha,:] !
+a[[:]:]]b !
+a[[:-:]]b !
+a[[:alph:]] !
+a[[:alphabet:]] !
+[[:alnum:]]+ -%@a0X_- 3 6
+[[:alpha:]]+ -%@aX_0- 3 5
+[[:blank:]]+ "a \tb" 1 4
+[[:cntrl:]]+ a\n\tb 1 3
+[[:digit:]]+ a019b 1 4
+[[:graph:]]+ " a%b " 1 4
+[[:lower:]]+ AabC 1 3
+; This test fails with STLPort, disable for now as this is a corner case anyway...
+;[[:print:]]+ "\na b\n" 1 4
+[[:punct:]]+ " %-&\t" 1 4
+[[:space:]]+ "a \n\t\rb" 1 5
+[[:upper:]]+ aBCd 1 3
+[[:xdigit:]]+ p0f3Cx 1 5
+
+; now test flag settings:
+- escape_in_lists REG_NO_POSIX_TEST
+[\n] \n 0 1
+- REG_NO_POSIX_TEST
+
+; line anchors
+- match_default normal REG_EXTENDED
+^ab ab 0 2
+^ab xxabxx -1 -1
+ab$ ab 0 2
+ab$ abxx -1 -1
+- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
+^ab ab -1 -1
+^ab xxabxx -1 -1
+ab$ ab -1 -1
+ab$ abxx -1 -1
+
+; back references
+- match_default normal REG_PERL
+a(b)\2c !
+a(b\1)c !
+a(b*)c\1d abbcbbd 0 7 1 3
+a(b*)c\1d abbcbd -1 -1
+a(b*)c\1d abbcbbbd -1 -1
+^(.)\1 abc -1 -1
+a([bc])\1d abcdabbd 4 8 5 6
+; strictly speaking this is at best ambiguous, at worst wrong, this is what most
+; re implimentations will match though.
+a(([bc])\2)*d abbccd 0 6 3 5 3 4
+
+a(([bc])\2)*d abbcbd -1 -1
+a((b)*\2)*d abbbd 0 5 1 4 2 3
+; perl only:
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+(a)\1bcd aabcd 0 5 0 1
+(a)\1bc*d aabcd 0 5 0 1
+(a)\1bc*d aabd 0 4 0 1
+(a)\1bc*d aabcccd 0 7 0 1
+(a)\1bc*[ce]d aabcccd 0 7 0 1
+^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
+
+; posix only:
+- match_default extended REG_EXTENDED
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+
+;
+; word operators:
+\w a 0 1
+\w z 0 1
+\w A 0 1
+\w Z 0 1
+\w _ 0 1
+\w } -1 -1
+\w ` -1 -1
+\w [ -1 -1
+\w @ -1 -1
+; non-word:
+\W a -1 -1
+\W z -1 -1
+\W A -1 -1
+\W Z -1 -1
+\W _ -1 -1
+\W } 0 1
+\W ` 0 1
+\W [ 0 1
+\W @ 0 1
+; word start:
+\<abcd " abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+; word boundary:
+\babcd " abcd" 2 6
+\bab cab -1 -1
+\bab "\nab" 1 3
+\btag ::tag 2 5
+abc\b abc 0 3
+abc\b abcd -1 -1
+abc\b abc\n 0 3
+abc\b abc:: 0 3
+; within word:
+\B ab 1 1
+a\Bb ab 0 2
+a\B ab 0 1
+a\B a -1 -1
+a\B "a " -1 -1
+
+;
+; buffer operators:
+\`abc abc 0 3
+\`abc \nabc -1 -1
+\`abc " abc" -1 -1
+abc\' abc 0 3
+abc\' abc\n -1 -1
+abc\' "abc " -1 -1
+
+;
+; now follows various complex expressions designed to try and bust the matcher:
+a(((b)))c abc 0 3 1 2 1 2 1 2
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c)d abbd 0 4 1 3
+; just gotta have one DFA-buster, of course
+a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
+; and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
+; and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
+; one really big one
+1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
+; fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
+[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
+[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
+[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
+; and as parenthesis go past 9:
+(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
+(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
+(a)d|(b)c abc 1 3 -1 -1 1 2
+_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
+
+; subtleties of matching
+;a(b)?c\1d acd 0 3 -1 -1
+; POSIX is about the following test:
+a(b)?c\1d acd -1 -1 -1 -1
+a(b?c)+d accd 0 4 2 3
+(wee|week)(knights|night) weeknights 0 10 0 3 3 10
+.* abc 0 3
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c|e)d abbd 0 4 1 3
+a(b*|c|e)d acd 0 3 1 2
+a(b*|c|e)d ad 0 2 1 1
+a(b?)c abc 0 3 1 2
+a(b?)c ac 0 2 1 1
+a(b+)c abc 0 3 1 2
+a(b+)c abbbc 0 5 1 4
+a(b*)c ac 0 2 1 1
+(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
+a([bc]?)c abc 0 3 1 2
+a([bc]?)c ac 0 2 1 1
+a([bc]+)c abc 0 3 1 2
+a([bc]+)c abcc 0 4 1 3
+a([bc]+)bc abcbc 0 5 1 3
+a(bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abbb 0 4 1 3
+a(bbb+|bb+|b)bb abbb 0 4 1 2
+(.*).* abcdef 0 6 0 6
+(a*)* bc 0 0 0 0
+xyx*xz xyxxxxyxxxz 5 11
+
+; do we get the right subexpression when it is used more than once?
+a(b|c)*d ad 0 2 -1 -1
+a(b|c)*d abcd 0 4 2 3
+a(b|c)+d abd 0 3 1 2
+a(b|c)+d abcd 0 4 2 3
+a(b|c?)+d ad 0 2 1 1
+a(b|c){0,0}d ad 0 2 -1 -1
+a(b|c){0,1}d ad 0 2 -1 -1
+a(b|c){0,1}d abd 0 3 1 2
+a(b|c){0,2}d ad 0 2 -1 -1
+a(b|c){0,2}d abcd 0 4 2 3
+a(b|c){0,}d ad 0 2 -1 -1
+a(b|c){0,}d abcd 0 4 2 3
+a(b|c){1,1}d abd 0 3 1 2
+a(b|c){1,2}d abd 0 3 1 2
+a(b|c){1,2}d abcd 0 4 2 3
+a(b|c){1,}d abd 0 3 1 2
+a(b|c){1,}d abcd 0 4 2 3
+a(b|c){2,2}d acbd 0 4 2 3
+a(b|c){2,2}d abcd 0 4 2 3
+a(b|c){2,4}d abcd 0 4 2 3
+a(b|c){2,4}d abcbd 0 5 3 4
+a(b|c){2,4}d abcbcd 0 6 4 5
+a(b|c){2,}d abcd 0 4 2 3
+a(b|c){2,}d abcbd 0 5 3 4
+; perl only: these conflict with the POSIX test below
+;a(b|c?)+d abcd 0 4 3 3
+;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
+;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
+
+; posix only:
+- match_default extended REG_EXTENDED REG_STARTEND
+
+a(b|c?)+d abcd 0 4 2 3
+a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
+a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
+a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
+a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
+a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
+a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
+a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
+a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
+
+- match_default normal REG_PERL
+; try to match C++ syntax elements:
+; line comment:
+//[^\n]* "++i //here is a line comment\n" 4 28
+; block comment:
+/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
+/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
+; preprossor directives:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
+; perl only:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
+; literals:
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
+; strings:
+'([^\\']|\\.)*' '\\x3A' 0 6 4 5
+'([^\\']|\\.)*' '\\'' 0 4 1 3
+'([^\\']|\\.)*' '\\n' 0 4 1 3
+
+; finally try some case insensitive matches:
+- match_default normal REG_EXTENDED REG_ICASE
+; upper and lower have no meaning here so they fail, however these
+; may compile with other libraries...
+;[[:lower:]] !
+;[[:upper:]] !
+0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
+
+; known and suspected bugs:
+- match_default normal REG_EXTENDED
+\( ( 0 1
+\) ) 0 1
+\$ $ 0 1
+\^ ^ 0 1
+\. . 0 1
+\* * 0 1
+\+ + 0 1
+\? ? 0 1
+\[ [ 0 1
+\] ] 0 1
+\| | 0 1
+\\ \\ 0 1
+# # 0 1
+\# # 0 1
+a- a- 0 2
+\- - 0 1
+\{ { 0 1
+\} } 0 1
+0 0 0 1
+1 1 0 1
+9 9 0 1
+b b 0 1
+B B 0 1
+< < 0 1
+> > 0 1
+w w 0 1
+W W 0 1
+` ` 0 1
+' ' 0 1
+\n \n 0 1
+, , 0 1
+a a 0 1
+f f 0 1
+n n 0 1
+r r 0 1
+t t 0 1
+v v 0 1
+c c 0 1
+x x 0 1
+: : 0 1
+(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
+
+- match_default normal REG_EXTENDED REG_ICASE
+a A 0 1
+A a 0 1
+[abc]+ abcABC 0 6
+[ABC]+ abcABC 0 6
+[a-z]+ abcABC 0 6
+[A-Z]+ abzANZ 0 6
+[a-Z]+ abzABZ 0 6
+[A-z]+ abzABZ 0 6
+[[:lower:]]+ abyzABYZ 0 8
+[[:upper:]]+ abzABZ 0 6
+[[:alpha:]]+ abyzABYZ 0 8
+[[:alnum:]]+ 09abyzABYZ 0 10
+
+; word start:
+\<abcd " abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+
+; collating elements and rewritten set code:
+- match_default normal REG_EXTENDED REG_STARTEND
+;[[.zero.]] 0 0 1
+;[[.one.]] 1 0 1
+;[[.two.]] 2 0 1
+;[[.three.]] 3 0 1
+[[.a.]] baa 1 2
+;[[.right-curly-bracket.]] } 0 1
+;[[.NUL.]] \0 0 1
+[[:<:]z] !
+[a[:>:]] !
+[[=a=]] a 0 1
+;[[=right-curly-bracket=]] } 0 1
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+[[.A.]] A 0 1
+[[.A.]] a 0 1
+[[.A.]-b]+ AaBb 0 4
+[A-[.b.]]+ AaBb 0 4
+[[.a.]-B]+ AaBb 0 4
+[a-[.B.]]+ AaBb 0 4
+- match_default normal REG_EXTENDED REG_STARTEND
+[[.a.]-c]+ abcd 0 3
+[a-[.c.]]+ abcd 0 3
+[[:alpha:]-a] !
+[a-[:alpha:]] !
+
+; try mutli-character ligatures:
+;[[.ae.]] ae 0 2
+;[[.ae.]] aE -1 -1
+;[[.AE.]] AE 0 2
+;[[.Ae.]] Ae 0 2
+;[[.ae.]-b] a -1 -1
+;[[.ae.]-b] b 0 1
+;[[.ae.]-b] ae 0 2
+;[a-[.ae.]] a 0 1
+;[a-[.ae.]] b -1 -1
+;[a-[.ae.]] ae 0 2
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+;[[.ae.]] AE 0 2
+;[[.ae.]] Ae 0 2
+;[[.AE.]] Ae 0 2
+;[[.Ae.]] aE 0 2
+;[[.AE.]-B] a -1 -1
+;[[.Ae.]-b] b 0 1
+;[[.Ae.]-b] B 0 1
+;[[.ae.]-b] AE 0 2
+
+- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
+\s+ "ab ab" 2 5
+\S+ " abc " 2 5
+
+- match_default normal REG_EXTENDED REG_STARTEND
+\`abc abc 0 3
+\`abc aabc -1 -1
+abc\' abc 0 3
+abc\' abcd -1 -1
+abc\' abc\n\n -1 -1
+abc\' abc 0 3
+
+; extended repeat checking to exercise new algorithms:
+ab.*xy abxy_ 0 4
+ab.*xy ab_xy_ 0 5
+ab.*xy abxy 0 4
+ab.*xy ab_xy 0 5
+ab.* ab 0 2
+ab.* ab__ 0 4
+
+ab.{2,5}xy ab__xy_ 0 6
+ab.{2,5}xy ab____xy_ 0 8
+ab.{2,5}xy ab_____xy_ 0 9
+ab.{2,5}xy ab__xy 0 6
+ab.{2,5}xy ab_____xy 0 9
+ab.{2,5} ab__ 0 4
+ab.{2,5} ab_______ 0 7
+ab.{2,5}xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+ab.*?xy abxy_ 0 4
+ab.*?xy ab_xy_ 0 5
+ab.*?xy abxy 0 4
+ab.*?xy ab_xy 0 5
+ab.*? ab 0 2
+ab.*? ab__ 0 4
+
+ab.{2,5}?xy ab__xy_ 0 6
+ab.{2,5}?xy ab____xy_ 0 8
+ab.{2,5}?xy ab_____xy_ 0 9
+ab.{2,5}?xy ab__xy 0 6
+ab.{2,5}?xy ab_____xy 0 9
+ab.{2,5}? ab__ 0 4
+ab.{2,5}? ab_______ 0 7
+ab.{2,5}?xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+; again but with slower algorithm variant:
+- match_default REG_EXTENDED
+; now again for single character repeats:
+
+ab_*xy abxy_ 0 4
+ab_*xy ab_xy_ 0 5
+ab_*xy abxy 0 4
+ab_*xy ab_xy 0 5
+ab_* ab 0 2
+ab_* ab__ 0 4
+
+ab_{2,5}xy ab__xy_ 0 6
+ab_{2,5}xy ab____xy_ 0 8
+ab_{2,5}xy ab_____xy_ 0 9
+ab_{2,5}xy ab__xy 0 6
+ab_{2,5}xy ab_____xy 0 9
+ab_{2,5} ab__ 0 4
+ab_{2,5} ab_______ 0 7
+ab_{2,5}xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+ab_*?xy abxy_ 0 4
+ab_*?xy ab_xy_ 0 5
+ab_*?xy abxy 0 4
+ab_*?xy ab_xy 0 5
+ab_*? ab 0 2
+ab_*? ab__ 0 4
+
+ab_{2,5}?xy ab__xy_ 0 6
+ab_{2,5}?xy ab____xy_ 0 8
+ab_{2,5}?xy ab_____xy_ 0 9
+ab_{2,5}?xy ab__xy 0 6
+ab_{2,5}?xy ab_____xy 0 9
+ab_{2,5}? ab__ 0 4
+ab_{2,5}? ab_______ 0 7
+ab_{2,5}?xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+; and again for sets:
+ab[_,;]*xy abxy_ 0 4
+ab[_,;]*xy ab_xy_ 0 5
+ab[_,;]*xy abxy 0 4
+ab[_,;]*xy ab_xy 0 5
+ab[_,;]* ab 0 2
+ab[_,;]* ab__ 0 4
+
+ab[_,;]{2,5}xy ab__xy_ 0 6
+ab[_,;]{2,5}xy ab____xy_ 0 8
+ab[_,;]{2,5}xy ab_____xy_ 0 9
+ab[_,;]{2,5}xy ab__xy 0 6
+ab[_,;]{2,5}xy ab_____xy 0 9
+ab[_,;]{2,5} ab__ 0 4
+ab[_,;]{2,5} ab_______ 0 7
+ab[_,;]{2,5}xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+ab[_,;]*?xy abxy_ 0 4
+ab[_,;]*?xy ab_xy_ 0 5
+ab[_,;]*?xy abxy 0 4
+ab[_,;]*?xy ab_xy 0 5
+ab[_,;]*? ab 0 2
+ab[_,;]*? ab__ 0 4
+
+ab[_,;]{2,5}?xy ab__xy_ 0 6
+ab[_,;]{2,5}?xy ab____xy_ 0 8
+ab[_,;]{2,5}?xy ab_____xy_ 0 9
+ab[_,;]{2,5}?xy ab__xy 0 6
+ab[_,;]{2,5}?xy ab_____xy 0 9
+ab[_,;]{2,5}? ab__ 0 4
+ab[_,;]{2,5}? ab_______ 0 7
+ab[_,;]{2,5}?xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+; and again for tricky sets with digraphs:
+;ab[_[.ae.]]*xy abxy_ 0 4
+;ab[_[.ae.]]*xy ab_xy_ 0 5
+;ab[_[.ae.]]*xy abxy 0 4
+;ab[_[.ae.]]*xy ab_xy 0 5
+;ab[_[.ae.]]* ab 0 2
+;ab[_[.ae.]]* ab__ 0 4
+
+;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5} ab__ 0 4
+;ab[_[.ae.]]{2,5} ab_______ 0 7
+;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+;ab[_[.ae.]]*?xy abxy_ 0 4
+;ab[_[.ae.]]*?xy ab_xy_ 0 5
+;ab[_[.ae.]]*?xy abxy 0 4
+;ab[_[.ae.]]*?xy ab_xy 0 5
+;ab[_[.ae.]]*? ab 0 2
+;ab[_[.ae.]]*? ab__ 0 2
+
+;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5}? ab__ 0 4
+;ab[_[.ae.]]{2,5}? ab_______ 0 4
+;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+; new bugs detected in spring 2003:
+- normal match_continuous REG_NO_POSIX_TEST
+b abc 1 2
+
+() abc 0 0 0 0
+^() abc 0 0 0 0
+^()+ abc 0 0 0 0
+^(){1} abc 0 0 0 0
+^(){2} abc 0 0 0 0
+^((){2}) abc 0 0 0 0 0 0
+() "" 0 0 0 0
+()\1 "" 0 0 0 0
+()\1 a 0 0 0 0
+a()\1b ab 0 2 1 1
+a()b\1 ab 0 2 1 1
+
+; subtleties of matching with no sub-expressions marked
+- normal match_nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4
+(wee|week)(knights|night) weeknights 0 10
+.* abc 0 3
+a(b|(c))d abd 0 3
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
+
+- normal nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4
+(wee|week)(knights|night) weeknights 0 10
+.* abc 0 3
+a(b|(c))d abd 0 3
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
+
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
new file mode 100644
index 0000000..236714b
--- /dev/null
+++ b/testsuite/Makefile.am
@@ -0,0 +1,89 @@
+CLEANFILES = tmp* core *.core $(EXTRA_PROGRAMS) *.*out *.log
+
+TESTS = $(check_PROGRAMS) $(SEDTESTS)
+SEDTESTS =
+
+LDADD = ../lib/libsed.a
+noinst_HEADERS = testcases.h ptestcases.h
+AM_CPPFLAGS = -I../lib
+
+if TEST_REGEX
+check_PROGRAMS = bug-regex7 \
+ bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
+ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
+ bug-regex21 tst-pcre tst-boost runtests runptests tst-rxspencer
+
+SEDTESTS += space
+endif
+
+SEDTESTS += \
+ enable sep inclib 8bit newjis xabcx dollar noeol noeolw \
+ numsub numsub2 numsub3 numsub4 numsub5 0range bkslashes \
+ head madding mac-mf empty xbxcx xbxcx3 recall xemacs \
+ fasts uniq manis khadafy linecnt eval distrib 8to7 \
+ allsub cv-vars classes middle bsd stdin flipcase \
+ insens subwrite writeout readin \
+ help version file quiet \
+ factor binary3 binary2 binary dc
+
+TESTS_ENVIRONMENT = MAKE="$(MAKE)" VERSION="$(VERSION)" $(srcdir)/runtest
+
+EXTRA_DIST = \
+ PCRE.tests BOOST.tests SPENCER.tests \
+ runtest Makefile.tests \
+ 0range.good 0range.inp 0range.sed \
+ 8bit.good 8bit.inp 8bit.sed \
+ 8to7.good 8to7.inp 8to7.sed \
+ allsub.good allsub.inp allsub.sed \
+ binary.good binary.inp binary.sed binary2.sed binary3.sed \
+ bkslashes.good bkslashes.inp bkslashes.sed \
+ bsd.good bsd.sh \
+ cv-vars.good cv-vars.inp cv-vars.sed \
+ classes.good classes.inp classes.sed \
+ dc.good dc.inp dc.sed \
+ distrib.good distrib.inp distrib.sed distrib.sh \
+ dollar.good dollar.inp dollar.sed \
+ empty.good empty.inp empty.sed \
+ enable.good enable.inp enable.sed \
+ eval.good eval.inp eval.sed \
+ factor.good factor.inp factor.sed \
+ fasts.good fasts.inp fasts.sed \
+ flipcase.good flipcase.inp flipcase.sed \
+ head.good head.inp head.sed \
+ inclib.good inclib.inp inclib.sed \
+ insens.good insens.inp insens.sed \
+ khadafy.good khadafy.inp khadafy.sed \
+ linecnt.good linecnt.inp linecnt.sed \
+ space.good space.inp space.sed \
+ mac-mf.good mac-mf.inp mac-mf.sed \
+ madding.good madding.inp madding.sed \
+ manis.good manis.inp manis.sed \
+ middle.good middle.sed middle.inp \
+ newjis.good newjis.inp newjis.sed \
+ noeol.good noeol.inp noeol.sed \
+ noeolw.good noeolw.1good noeolw.2good noeolw.sed \
+ numsub.good numsub.inp numsub.sed \
+ numsub2.good numsub2.inp numsub2.sed \
+ numsub3.good numsub3.inp numsub3.sed \
+ numsub4.good numsub4.inp numsub4.sed \
+ numsub5.good numsub5.inp numsub5.sed \
+ readin.good readin.in2 readin.inp readin.sed \
+ recall.good recall.inp recall.sed \
+ sep.good sep.inp sep.sed \
+ subwrite.inp subwrite.sed subwrt1.good subwrt2.good \
+ uniq.good uniq.inp uniq.sed \
+ version.gin \
+ writeout.inp writeout.sed wrtout1.good wrtout2.good \
+ xabcx.good xabcx.inp xabcx.sed \
+ xbxcx.good xbxcx.inp xbxcx.sed \
+ xbxcx3.good xbxcx3.inp xbxcx3.sed \
+ xemacs.good xemacs.inp xemacs.sed
+
+clean-local:
+ test x$(srcdir) = x. || rm -f readin.in2 eval.in2
+
+# automake makes `check' depend on $(TESTS). Declare
+# dummy targets for $(TESTS) so that make does not complain.
+
+.PHONY: $(SEDTESTS)
+$(SEDTESTS):
diff --git a/testsuite/Makefile.tests b/testsuite/Makefile.tests
new file mode 100644
index 0000000..88a3a01
--- /dev/null
+++ b/testsuite/Makefile.tests
@@ -0,0 +1,154 @@
+# Testsuite makefile for GNU sed
+
+SHELL = /bin/sh
+
+# These are only fallback values. They are usually overridden by runtest.
+srcdir = .
+SED = ../sed/sed
+SEDENV = LC_ALL=C $(TIME)
+
+#TIME=time
+CMP=cmp
+RM=rm -f
+
+enable sep inclib 8bit 8to7 newjis xabcx dollar noeol bkslashes \
+numsub head madding mac-mf empty xbxcx xbxcx3 recall xemacs \
+fasts uniq manis linecnt khadafy allsub flipcase space::
+ $(SEDENV) $(SED) -f $(srcdir)/$@.sed \
+ < $(srcdir)/$@.inp > $@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+0range::
+ $(SEDENV) $(SED) -s -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+# This checks for a bug in 3.02 and 3.02.80
+stdin::
+ ($(SEDENV) $(SED) d; $(SEDENV) $(SED) G) < $(srcdir)/numsub.inp > $@.1out
+ $(SEDENV) cat $(srcdir)/numsub.inp | ($(SEDENV) $(SED) d; $(SEDENV) $(SED) G) > $@.2out
+ $(CMP) $@.1out $@.2out
+ @$(RM) $@.1out $@.2out
+
+cv-vars classes middle dc distrib factor numsub2 numsub3 numsub4 numsub5 \
+insens::
+ $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+noeolw::
+ $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed \
+ $(srcdir)/noeol.inp $(srcdir)/noeol.inp > $@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ $(CMP) $(srcdir)/$@.1good $@.1out
+ $(CMP) $(srcdir)/$@.2good $@.2out
+ @$(RM) $@.1out $@.2out $@.out
+
+subwrite::
+ $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.1out
+ $(CMP) $(srcdir)/subwrt1.good $@.1out
+ $(CMP) $(srcdir)/subwrt2.good $@.wout
+ @$(RM) $@.1out $@.wout
+
+bsd::
+ $(SEDENV) sh $(srcdir)/$@.sh '$(SED)' bsd.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+writeout::
+ $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp >$@.1out
+ $(CMP) $(srcdir)/wrtout1.good $@.1out
+ $(CMP) $(srcdir)/wrtout2.good $@.wout
+ @$(RM) $@.1out $@.wout
+
+readin.in2: $(srcdir)/readin.in2
+ cat $(srcdir)/readin.in2 > $@
+
+readin:: readin.in2
+ $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp >$@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+eval.in2: $(srcdir)/eval.inp
+ cat $(srcdir)/eval.inp > $@
+
+eval:: eval.in2
+ $(SEDENV) $(SED) -f $(srcdir)/$@.sed < $(srcdir)/$@.inp > $@.out
+ $(CMP) $(srcdir)/$@.good $@.out
+ @$(RM) $@.out
+
+binary binary2 binary3::
+ $(SEDENV) $(SED) -n -f $(srcdir)/$@.sed < $(srcdir)/binary.inp >$@.out
+ $(CMP) $(srcdir)/binary.good $@.out
+ @$(RM) $@.out
+
+#
+# cmdlines targets
+#
+
+help::
+ $(SED) --help | $(SED) '1s/ [^ ]* / sed /' > $@.1out
+ $(SED) 2>&1 | $(SED) '1s/ [^ ]* / sed /' > $@.2out || :
+ $(CMP) $@.1out $@.2out
+ @$(RM) $@.1out $@.2out
+
+version::
+ $(SED) 's^@'VERSION'@^$(VERSION)^' $(srcdir)/version.gin > $@.good
+ $(SEDENV) $(SED) --version > $@.out 2>&1
+ $(CMP) $@.good $@.out
+ @$(RM) $@.good $@.out
+
+file::
+ $(SEDENV) $(SED) --file=$(srcdir)/newjis.sed \
+ < $(srcdir)/newjis.inp > $@.out
+ $(CMP) $(srcdir)/newjis.good $@.out
+ @$(RM) $@.out
+
+quiet::
+ $(SEDENV) $(SED) --quiet -f $(srcdir)/cv-vars.sed \
+ < $(srcdir)/cv-vars.inp > $@.out
+ $(CMP) $(srcdir)/cv-vars.good $@.out
+ @$(RM) $@.out
+
+# The following target is not used in super sed builds (only GNU sed)
+
+bug-regex7$(EXEEXT) bug-regex8$(EXEEXT) bug-regex9$(EXEEXT) \
+bug-regex10$(EXEEXT) bug-regex11$(EXEEXT) bug-regex12$(EXEEXT) \
+bug-regex13$(EXEEXT) bug-regex14$(EXEEXT) bug-regex15$(EXEEXT) bug-regex16$(EXEEXT) \
+bug-regex21$(EXEEXT) runtests$(EXEEXT) runptests$(EXEEXT):
+ echo "$(SEDENV) ./$@ > `echo $@ | $(SED) s/$(EXEEXT)$$/.log/`"
+ @$(SEDENV) ./$@ > `echo $@ | $(SED) s/$(EXEEXT)$$/.log/`
+
+tst-pcre$(EXEEXT):
+ $(SEDENV) ./tst-pcre $(srcdir)/PCRE.tests > tst-pcre.log
+
+tst-boost$(EXEEXT):
+ $(SEDENV) ./tst-boost $(srcdir)/BOOST.tests > tst-boost.log
+
+tst-rxspencer$(EXEEXT):
+ $(SEDENV) ./tst-rxspencer $(srcdir)/SPENCER.tests > tst-spencer.log
+
+# The following target is not used in GNU sed builds (only super-sed)
+
+pcretest$(EXEEXT)::
+ $(SEDENV) ./pcretest $(srcdir)/pcre1.inp pcre1.out
+ $(CMP) $(srcdir)/pcre1.good pcre1.out
+ #$(SEDENV) ./pcretest -p $(srcdir)/pcre1.inp pcre1p.out
+ #$(CMP) $(srcdir)/pcre1p.good pcre1p.out
+ $(SEDENV) ./pcretest -P $(srcdir)/pcre2.inp pcre2.out
+ $(CMP) $(srcdir)/pcre2.good pcre2.out
+ $(SEDENV) ./pcretest -P -p $(srcdir)/pcre2.inp pcre2p.out
+ $(CMP) $(srcdir)/pcre2p.good pcre2p.out
+ $(SEDENV) ./pcretest $(srcdir)/pcre3.inp pcre3.out
+ $(CMP) $(srcdir)/pcre3.good pcre3.out
+ $(SEDENV) ./pcretest -p $(srcdir)/pcre3.inp pcre3p.out
+ $(CMP) $(srcdir)/pcre3p.good pcre3p.out
+ @$(RM) pcre*.out
+
+.PHONY: \
+bug-regex7$(EXEEXT) bug-regex8$(EXEEXT) bug-regex9$(EXEEXT) \
+bug-regex10$(EXEEXT) bug-regex11$(EXEEXT) bug-regex12$(EXEEXT) \
+bug-regex13$(EXEEXT) bug-regex14$(EXEEXT) bug-regex15$(EXEEXT) bug-regex16$(EXEEXT) \
+bug-regex21$(EXEEXT) runtests$(EXEEXT) runptests$(EXEEXT) \
+tst-pcre$(EXEEXT) tst-boost$(EXEEXT) tst-rxspencer$(EXEEXT) pcretest$(EXEEXT)
diff --git a/testsuite/PCRE.tests b/testsuite/PCRE.tests
new file mode 100644
index 0000000..7ea5b9e
--- /dev/null
+++ b/testsuite/PCRE.tests
@@ -0,0 +1,2367 @@
+# PCRE version 4.4 21-August-2003
+
+# Tests taken from PCRE and modified to suit glibc regex.
+#
+# PCRE LICENCE
+# ------------
+#
+# PCRE is a library of functions to support regular expressions whose syntax
+# and semantics are as close as possible to those of the Perl 5 language.
+#
+# Written by: Philip Hazel <ph10@cam.ac.uk>
+#
+# University of Cambridge Computing Service,
+# Cambridge, England. Phone: +44 1223 334714.
+#
+# Copyright (c) 1997-2003 University of Cambridge
+#
+# Permission is granted to anyone to use this software for any purpose on any
+# computer system, and to redistribute it freely, subject to the following
+# restrictions:
+#
+# 1. This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# 2. The origin of this software must not be misrepresented, either by
+# explicit claim or by omission. In practice, this means that if you use
+# PCRE in software that you distribute to others, commercially or
+# otherwise, you must put a sentence like this
+#
+# Regular expression support is provided by the PCRE library package,
+# which is open source software, written by Philip Hazel, and copyright
+# by the University of Cambridge, England.
+#
+# somewhere reasonably visible in your documentation and in any relevant
+# files or online help data or similar. A reference to the ftp site for
+# the source, that is, to
+#
+# ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
+#
+# should also be given in the documentation. However, this condition is not
+# intended to apply to whole chains of software. If package A includes PCRE,
+# it must acknowledge it, but if package B is software that includes package
+# A, the condition is not imposed on package B (unless it uses PCRE
+# independently).
+#
+# 3. Altered versions must be plainly marked as such, and must not be
+# misrepresented as being the original software.
+#
+# 4. If PCRE is embedded in any software that is released under the GNU
+# General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
+# then the terms of that licence shall supersede any condition above with
+# which it is incompatible.
+#
+# The documentation for PCRE, supplied in the "doc" directory, is distributed
+# under the same terms as the software itself.
+#
+# End
+#
+
+/the quick brown fox/
+ the quick brown fox
+ 0: the quick brown fox
+ The quick brown FOX
+No match
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+ What do you know about THE QUICK BROWN FOX?
+No match
+
+/The quick brown fox/i
+ the quick brown fox
+ 0: the quick brown fox
+ The quick brown FOX
+ 0: The quick brown FOX
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+ What do you know about THE QUICK BROWN FOX?
+ 0: THE QUICK BROWN FOX
+
+/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ aabxyzpqrrrabbxyyyypqAzz
+ 0: aabxyzpqrrrabbxyyyypqAzz
+ aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+ aabcxyzpqrrrabbxyyyypqAzz
+ 0: aabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypAzz
+ 0: aaabcxyzpqrrrabbxyyyypAzz
+ aaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ aaaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzpqrrrabbxyyyypqAzz
+ abxyzzpqrrrabbxyyyypqAzz
+ 0: abxyzzpqrrrabbxyyyypqAzz
+ aabxyzzzpqrrrabbxyyyypqAzz
+ 0: aabxyzzzpqrrrabbxyyyypqAzz
+ aaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabxyzzzzpqrrrabbxyyyypqAzz
+ aaaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzzzzpqrrrabbxyyyypqAzz
+ abcxyzzpqrrrabbxyyyypqAzz
+ 0: abcxyzzpqrrrabbxyyyypqAzz
+ aabcxyzzzpqrrrabbxyyyypqAzz
+ 0: aabcxyzzzpqrrrabbxyyyypqAzz
+ aaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypABzz
+ 0: aaabcxyzpqrrrabbxyyyypABzz
+ aaabcxyzpqrrrabbxyyyypABBzz
+ 0: aaabcxyzpqrrrabbxyyyypABBzz
+ >>>aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ >aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ >>>>abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+ *** Failers
+No match
+ abxyzpqrrabbxyyyypqAzz
+No match
+ abxyzpqrrrrabbxyyyypqAzz
+No match
+ abxyzpqrrrabxyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyypqAzz
+No match
+ aaabcxyzpqrrrabbxyyyypqqqqqqqAzz
+No match
+
+/^(abc){1,2}zz/
+ abczz
+ 0: abczz
+ 1: abc
+ abcabczz
+ 0: abcabczz
+ 1: abc
+ *** Failers
+No match
+ zz
+No match
+ abcabcabczz
+No match
+ >>abczz
+No match
+
+/^(b+|a){1,2}c/
+ bc
+ 0: bc
+ 1: b
+ bbc
+ 0: bbc
+ 1: bb
+ bbbc
+ 0: bbbc
+ 1: bbb
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ aac
+ 0: aac
+ 1: a
+ abbbbbbbbbbbc
+ 0: abbbbbbbbbbbc
+ 1: bbbbbbbbbbb
+ bbbbbbbbbbbac
+ 0: bbbbbbbbbbbac
+ 1: a
+ *** Failers
+No match
+ aaac
+No match
+ abbbbbbbbbbbac
+No match
+
+/^[]cde]/
+ ]thing
+ 0: ]
+ cthing
+ 0: c
+ dthing
+ 0: d
+ ething
+ 0: e
+ *** Failers
+No match
+ athing
+No match
+ fthing
+No match
+
+/^[^]cde]/
+ athing
+ 0: a
+ fthing
+ 0: f
+ *** Failers
+ 0: *
+ ]thing
+No match
+ cthing
+No match
+ dthing
+No match
+ ething
+No match
+
+/^[0-9]+$/
+ 0
+ 0: 0
+ 1
+ 0: 1
+ 2
+ 0: 2
+ 3
+ 0: 3
+ 4
+ 0: 4
+ 5
+ 0: 5
+ 6
+ 0: 6
+ 7
+ 0: 7
+ 8
+ 0: 8
+ 9
+ 0: 9
+ 10
+ 0: 10
+ 100
+ 0: 100
+ *** Failers
+No match
+ abc
+No match
+
+/^.*nter/
+ enter
+ 0: enter
+ inter
+ 0: inter
+ uponter
+ 0: uponter
+
+/^xxx[0-9]+$/
+ xxx0
+ 0: xxx0
+ xxx1234
+ 0: xxx1234
+ *** Failers
+No match
+ xxx
+No match
+
+/^.+[0-9][0-9][0-9]$/
+ x123
+ 0: x123
+ xx123
+ 0: xx123
+ 123456
+ 0: 123456
+ *** Failers
+No match
+ 123
+No match
+ x1234
+ 0: x1234
+
+/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/
+ abc!pqr=apquxz.ixr.zzz.ac.uk
+ 0: abc!pqr=apquxz.ixr.zzz.ac.uk
+ 1: abc
+ 2: pqr
+ *** Failers
+No match
+ !pqr=apquxz.ixr.zzz.ac.uk
+No match
+ abc!=apquxz.ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz:ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz.ixr.zzz.ac.ukk
+No match
+
+/:/
+ Well, we need a colon: somewhere
+ 0: :
+ *** Fail if we don't
+No match
+
+/([0-9a-f:]+)$/i
+ 0abc
+ 0: 0abc
+ 1: 0abc
+ abc
+ 0: abc
+ 1: abc
+ fed
+ 0: fed
+ 1: fed
+ E
+ 0: E
+ 1: E
+ ::
+ 0: ::
+ 1: ::
+ 5f03:12C0::932e
+ 0: 5f03:12C0::932e
+ 1: 5f03:12C0::932e
+ fed def
+ 0: def
+ 1: def
+ Any old stuff
+ 0: ff
+ 1: ff
+ *** Failers
+No match
+ 0zzz
+No match
+ gzzz
+No match
+ Any old rubbish
+No match
+
+/^.*\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$/
+ .1.2.3
+ 0: .1.2.3
+ 1: 1
+ 2: 2
+ 3: 3
+ A.12.123.0
+ 0: A.12.123.0
+ 1: 12
+ 2: 123
+ 3: 0
+ *** Failers
+No match
+ .1.2.3333
+No match
+ 1.2.3
+No match
+ 1234.2.3
+No match
+
+/^([0-9]+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
+ 1 IN SOA non-sp1 non-sp2(
+ 0: 1 IN SOA non-sp1 non-sp2(
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+ 1 IN SOA non-sp1 non-sp2 (
+ 0: 1 IN SOA non-sp1 non-sp2 (
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+ *** Failers
+No match
+ 1IN SOA non-sp1 non-sp2(
+No match
+
+/^[a-zA-Z0-9][a-zA-Z0-9-]*(\.[a-zA-Z0-9][a-zA-z0-9-]*)*\.$/
+ a.
+ 0: a.
+ Z.
+ 0: Z.
+ 2.
+ 0: 2.
+ ab-c.pq-r.
+ 0: ab-c.pq-r.
+ 1: .pq-r
+ sxk.zzz.ac.uk.
+ 0: sxk.zzz.ac.uk.
+ 1: .uk
+ x-.y-.
+ 0: x-.y-.
+ 1: .y-
+ *** Failers
+No match
+ -abc.peq.
+No match
+
+/^\*\.[a-z]([a-z0-9-]*[a-z0-9]+)?(\.[a-z]([a-z0-9-]*[a-z0-9]+)?)*$/
+ *.a
+ 0: *.a
+ *.b0-a
+ 0: *.b0-a
+ 1: 0-a
+ *.c3-b.c
+ 0: *.c3-b.c
+ 1: 3-b
+ 2: .c
+ *.c-a.b-c
+ 0: *.c-a.b-c
+ 1: -a
+ 2: .b-c
+ 3: -c
+ *** Failers
+No match
+ *.0
+No match
+ *.a-
+No match
+ *.a-b.c-
+No match
+ *.c-a.0-c
+No match
+
+/^[0-9a-f](\.[0-9a-f])*$/i
+ a.b.c.d
+ 0: a.b.c.d
+ 1: .d
+ A.B.C.D
+ 0: A.B.C.D
+ 1: .D
+ a.b.c.1.2.3.C
+ 0: a.b.c.1.2.3.C
+ 1: .C
+
+/^".*"\s*(;.*)?$/
+ "1234"
+ 0: "1234"
+ "abcd" ;
+ 0: "abcd" ;
+ 1: ;
+ "" ; rhubarb
+ 0: "" ; rhubarb
+ 1: ; rhubarb
+ *** Failers
+No match
+ "1234" : things
+No match
+
+/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/
+ abcdefhijklm
+ 0: abcdefhijklm
+ 1: abc
+ 2: bc
+ 3: c
+ 4: def
+ 5: ef
+ 6: f
+ 7: hij
+ 8: ij
+ 9: j
+10: klm
+11: lm
+12: m
+
+/^a*\w/
+ z
+ 0: z
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ a
+ 0: a
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ a+
+ 0: a
+ aa+
+ 0: aa
+
+/^a+\w/
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ aa+
+ 0: aa
+
+/^[0-9]{8}\w{2,}/
+ 1234567890
+ 0: 1234567890
+ 12345678ab
+ 0: 12345678ab
+ 12345678__
+ 0: 12345678__
+ *** Failers
+No match
+ 1234567
+No match
+
+/^[aeiou0-9]{4,5}$/
+ uoie
+ 0: uoie
+ 1234
+ 0: 1234
+ 12345
+ 0: 12345
+ aaaaa
+ 0: aaaaa
+ *** Failers
+No match
+ 123456
+No match
+
+/\`(abc|def)=(\1){2,3}\'/
+ abc=abcabc
+ 0: abc=abcabc
+ 1: abc
+ 2: abc
+ def=defdefdef
+ 0: def=defdefdef
+ 1: def
+ 2: def
+ *** Failers
+No match
+ abc=defdef
+No match
+
+/(cat(a(ract|tonic)|erpillar)) \1()2(3)/
+ cataract cataract23
+ 0: cataract cataract23
+ 1: cataract
+ 2: aract
+ 3: ract
+ 4:
+ 5: 3
+ catatonic catatonic23
+ 0: catatonic catatonic23
+ 1: catatonic
+ 2: atonic
+ 3: tonic
+ 4:
+ 5: 3
+ caterpillar caterpillar23
+ 0: caterpillar caterpillar23
+ 1: caterpillar
+ 2: erpillar
+ 3: <unset>
+ 4:
+ 5: 3
+
+
+/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: abcd
+
+/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}[0-9]{1,2}\s+[0-9][0-9]:[0-9][0-9]/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: Sep
+ From abcd Mon Sep 1 12:33:02 1997
+ 0: From abcd Mon Sep 1 12:33
+ 1: Sep
+ *** Failers
+No match
+ From abcd Sep 01 12:33:02 1997
+No match
+
+/^(a)\1{2,3}(.)/
+ aaab
+ 0: aaab
+ 1: a
+ 2: b
+ aaaab
+ 0: aaaab
+ 1: a
+ 2: b
+ aaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+ aaaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+
+/^[ab]{1,3}(ab*|b)/
+ aabbbbb
+ 0: aabbbbb
+ 1: abbbbb
+
+/^(cow|)\1(bell)/
+ cowcowbell
+ 0: cowcowbell
+ 1: cow
+ 2: bell
+ bell
+ 0: bell
+ 1:
+ 2: bell
+ *** Failers
+No match
+ cowbell
+No match
+
+/^(a|)\1+b/
+ aab
+ 0: aab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+
+/^(a|)\1{2}b/
+ aaab
+ 0: aaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+ aab
+No match
+ aaaab
+No match
+
+/^(a|)\1{2,3}b/
+ aaab
+ 0: aaab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+ aab
+No match
+ aaaaab
+No match
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+ abbbc
+ 0: abbbc
+ abbc
+ 0: abbc
+ *** Failers
+No match
+ abc
+No match
+ abbbbbc
+No match
+
+/([^.]*)\.([^:]*):[T ]+(.*)/
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[T ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[t ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/^abc$/
+ abc
+ 0: abc
+ *** Failers
+No match
+
+/[-az]+/
+ az-
+ 0: az-
+ *** Failers
+ 0: a
+ b
+No match
+
+/[az-]+/
+ za-
+ 0: za-
+ *** Failers
+ 0: a
+ b
+No match
+
+/[a-z]+/
+ abcdxyz
+ 0: abcdxyz
+
+/[0-9-]+/
+ 12-34
+ 0: 12-34
+ *** Failers
+No match
+ aaa
+No match
+
+/(abc)\1/i
+ abcabc
+ 0: abcabc
+ 1: abc
+ ABCabc
+ 0: ABCabc
+ 1: ABC
+ abcABC
+ 0: abcABC
+ 1: abc
+
+/a{0}bc/
+ bc
+ 0: bc
+
+/^([^a])([^b])([^c]*)([^d]{3,4})/
+ baNOTccccd
+ 0: baNOTcccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: cccc
+ baNOTcccd
+ 0: baNOTccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: ccc
+ baNOTccd
+ 0: baNOTcc
+ 1: b
+ 2: a
+ 3: NO
+ 4: Tcc
+ bacccd
+ 0: baccc
+ 1: b
+ 2: a
+ 3:
+ 4: ccc
+ *** Failers
+ 0: *** Failers
+ 1: *
+ 2: *
+ 3: * Fail
+ 4: ers
+ anything
+No match
+ baccd
+No match
+
+/[^a]/
+ Abc
+ 0: A
+
+/[^a]/i
+ Abc
+ 0: b
+
+/[^a]+/
+ AAAaAbc
+ 0: AAA
+
+/[^a]+/i
+ AAAaAbc
+ 0: bc
+
+/[^k]$/
+ abc
+ 0: c
+ *** Failers
+ 0: s
+ abk
+No match
+
+/[^k]{2,3}$/
+ abc
+ 0: abc
+ kbc
+ 0: bc
+ kabc
+ 0: abc
+ *** Failers
+ 0: ers
+ abk
+No match
+ akb
+No match
+ akk
+No match
+
+/^[0-9]{8,}@.+[^k]$/
+ 12345678@a.b.c.d
+ 0: 12345678@a.b.c.d
+ 123456789@x.y.z
+ 0: 123456789@x.y.z
+ *** Failers
+No match
+ 12345678@x.y.uk
+No match
+ 1234567@a.b.c.d
+No match
+
+/(a)\1{8,}/
+ aaaaaaaaa
+ 0: aaaaaaaaa
+ 1: a
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: a
+ *** Failers
+No match
+ aaaaaaa
+No match
+
+/[^a]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^a]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+/[^az]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^az]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+/P[^*]TAIRE[^*]{1,6}LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/P[^*]TAIRE[^*]{1,}LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/(\.[0-9][0-9][1-9]?)[0-9]+/
+ 1.230003938
+ 0: .230003938
+ 1: .23
+ 1.875000282
+ 0: .875000282
+ 1: .875
+ 1.235
+ 0: .235
+ 1: .23
+
+/\b(foo)\s+(\w+)/i
+ Food is on the foo table
+ 0: foo table
+ 1: foo
+ 2: table
+
+/foo(.*)bar/
+ The food is under the bar in the barn.
+ 0: food is under the bar in the bar
+ 1: d is under the bar in the
+
+/(.*)([0-9]*)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 53147
+ 2:
+
+/(.*)([0-9]+)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*)([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*)\b([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/(.*[^0-9])([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]][[:digit:]][[:digit:]]/
+ 01/01/2000
+ 0: 01/01/2000
+
+/^(a){0,0}/
+ bcd
+ 0:
+ abc
+ 0:
+ aab
+ 0:
+
+/^(a){0,1}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+
+/^(a){0,2}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+
+/^(a){0,3}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+
+/^(a){0,}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+
+/^(a){1,1}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+
+/^(a){1,2}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+
+/^(a){1,3}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+
+/^(a){1,}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+
+/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/
+ 123456654321
+ 0: 123456654321
+
+/^[[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]]/
+ 123456654321
+ 0: 123456654321
+
+/^[abc]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^[a-c]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^(a|b|c){12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+ 1: c
+
+/^[abcdefghijklmnopqrstuvwxy0123456789]/
+ n
+ 0: n
+ *** Failers
+No match
+ z
+No match
+
+/abcde{0,0}/
+ abcd
+ 0: abcd
+ *** Failers
+No match
+ abce
+No match
+
+/ab[cd]{0,0}e/
+ abe
+ 0: abe
+ *** Failers
+No match
+ abcde
+No match
+
+/ab(c){0,0}d/
+ abd
+ 0: abd
+ *** Failers
+No match
+ abcd
+No match
+
+/a(b*)/
+ a
+ 0: a
+ 1:
+ ab
+ 0: ab
+ 1: b
+ abbbb
+ 0: abbbb
+ 1: bbbb
+ *** Failers
+ 0: a
+ 1:
+ bbbbb
+No match
+
+/ab[0-9]{0}e/
+ abe
+ 0: abe
+ *** Failers
+No match
+ ab1e
+No match
+
+/(A|B)*CD/
+ CD
+ 0: CD
+
+/(AB)*\1/
+ ABABAB
+ 0: ABABAB
+ 1: AB
+
+/([0-9]+)(\w)/
+ 12345a
+ 0: 12345a
+ 1: 12345
+ 2: a
+ 12345+
+ 0: 12345
+ 1: 1234
+ 2: 5
+
+/(abc|)+/
+ abc
+ 0: abc
+ 1: abc
+ abcabc
+ 0: abcabc
+ 1: abc
+ abcabcabc
+ 0: abcabcabc
+ 1: abc
+ xyz
+ 0:
+ 1:
+
+/([a]*)*/
+ a
+ 0: a
+ 1: a
+ aaaaa
+ 0: aaaaa
+ 1: aaaaa
+
+/([ab]*)*/
+ a
+ 0: a
+ 1: a
+ b
+ 0: b
+ 1: b
+ ababab
+ 0: ababab
+ 1: ababab
+ aaaabcde
+ 0: aaaab
+ 1: aaaab
+ bbbb
+ 0: bbbb
+ 1: bbbb
+
+/([^a]*)*/
+ b
+ 0: b
+ 1: b
+ bbbb
+ 0: bbbb
+ 1: bbbb
+ aaa
+ 0:
+
+/([^ab]*)*/
+ cccc
+ 0: cccc
+ 1: cccc
+ abab
+ 0:
+
+/abc/
+ abc
+ 0: abc
+ xabcy
+ 0: abc
+ ababc
+ 0: abc
+ *** Failers
+No match
+ xbc
+No match
+ axc
+No match
+ abx
+No match
+
+/ab*c/
+ abc
+ 0: abc
+
+/ab*bc/
+ abc
+ 0: abc
+ abbc
+ 0: abbc
+ abbbbc
+ 0: abbbbc
+
+/.{1}/
+ abbbbc
+ 0: a
+
+/.{3,4}/
+ abbbbc
+ 0: abbb
+
+/ab{0,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab+bc/
+ abbc
+ 0: abbc
+ *** Failers
+No match
+ abc
+No match
+ abq
+No match
+
+/ab+bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{3,4}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{4,5}bc/
+ *** Failers
+No match
+ abq
+No match
+ abbbbc
+No match
+
+/ab?bc/
+ abbc
+ 0: abbc
+ abc
+ 0: abc
+
+/ab{0,1}bc/
+ abc
+ 0: abc
+
+/ab?c/
+ abc
+ 0: abc
+
+/ab{0,1}c/
+ abc
+ 0: abc
+
+/^abc$/
+ abc
+ 0: abc
+ *** Failers
+No match
+ abbbbc
+No match
+ abcc
+No match
+
+/^abc/
+ abcc
+ 0: abc
+
+/abc$/
+ aabc
+ 0: abc
+ *** Failers
+No match
+ aabc
+ 0: abc
+ aabcd
+No match
+
+/^/
+ abc
+ 0:
+
+/$/
+ abc
+ 0:
+
+/a.c/
+ abc
+ 0: abc
+ axc
+ 0: axc
+
+/a.*c/
+ axyzc
+ 0: axyzc
+
+/a[bc]d/
+ abd
+ 0: abd
+ *** Failers
+No match
+ axyzd
+No match
+ abc
+No match
+
+/a[b-d]e/
+ ace
+ 0: ace
+
+/a[b-d]/
+ aac
+ 0: ac
+
+/a[-b]/
+ a-
+ 0: a-
+
+/a[b-]/
+ a-
+ 0: a-
+
+/a[]]b/
+ a]b
+ 0: a]b
+
+/a[^bc]d/
+ aed
+ 0: aed
+ *** Failers
+No match
+ abd
+No match
+ abd
+No match
+
+/a[^-b]c/
+ adc
+ 0: adc
+
+/a[^]b]c/
+ adc
+ 0: adc
+ *** Failers
+No match
+ a-c
+ 0: a-c
+ a]c
+No match
+
+/\ba\b/
+ a-
+ 0: a
+ -a
+ 0: a
+ -a-
+ 0: a
+
+/\by\b/
+ *** Failers
+No match
+ xy
+No match
+ yz
+No match
+ xyz
+No match
+
+/\Ba\B/
+ *** Failers
+ 0: a
+ a-
+No match
+ -a
+No match
+ -a-
+No match
+
+/\By\b/
+ xy
+ 0: y
+
+/\by\B/
+ yz
+ 0: y
+
+/\By\B/
+ xyz
+ 0: y
+
+/\w/
+ a
+ 0: a
+
+/\W/
+ -
+ 0: -
+ *** Failers
+ 0: *
+ -
+ 0: -
+ a
+No match
+
+/a\sb/
+ a b
+ 0: a b
+
+/a\Sb/
+ a-b
+ 0: a-b
+ *** Failers
+No match
+ a-b
+ 0: a-b
+ a b
+No match
+
+/[0-9]/
+ 1
+ 0: 1
+
+/[^0-9]/
+ -
+ 0: -
+ *** Failers
+ 0: *
+ -
+ 0: -
+ 1
+No match
+
+/ab|cd/
+ abc
+ 0: ab
+ abcd
+ 0: ab
+
+/()ef/
+ def
+ 0: ef
+ 1:
+
+/a\(b/
+ a(b
+ 0: a(b
+
+/a\(*b/
+ ab
+ 0: ab
+ a((b
+ 0: a((b
+
+/((a))/
+ abc
+ 0: a
+ 1: a
+ 2: a
+
+/(a)b(c)/
+ abc
+ 0: abc
+ 1: a
+ 2: c
+
+/a+b+c/
+ aabbabc
+ 0: abc
+
+/a{1,}b{1,}c/
+ aabbabc
+ 0: abc
+
+/(a+|b)*/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){0,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)+/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){1,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)?/
+ ab
+ 0: a
+ 1: a
+
+/(a+|b){0,1}/
+ ab
+ 0: a
+ 1: a
+
+/[^ab]*/
+ cde
+ 0: cde
+
+/abc/
+ *** Failers
+No match
+ b
+No match
+
+
+/a*/
+
+
+/([abc])*d/
+ abbbcd
+ 0: abbbcd
+ 1: c
+
+/([abc])*bcd/
+ abcd
+ 0: abcd
+ 1: a
+
+/a|b|c|d|e/
+ e
+ 0: e
+
+/(a|b|c|d|e)f/
+ ef
+ 0: ef
+ 1: e
+
+/abcd*efg/
+ abcdefg
+ 0: abcdefg
+
+/ab*/
+ xabyabbbz
+ 0: ab
+ xayabbbz
+ 0: a
+
+/(ab|cd)e/
+ abcde
+ 0: cde
+ 1: cd
+
+/[abhgefdc]ij/
+ hij
+ 0: hij
+
+/(abc|)ef/
+ abcdef
+ 0: ef
+ 1:
+
+/(a|b)c*d/
+ abcd
+ 0: bcd
+ 1: b
+
+/(ab|ab*)bc/
+ abc
+ 0: abc
+ 1: a
+
+/a([bc]*)c*/
+ abc
+ 0: abc
+ 1: bc
+
+/a([bc]*)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]+)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]*)(c+d)/
+ abcd
+ 0: abcd
+ 1: b
+ 2: cd
+
+/a[bcd]*dcdcde/
+ adcdcde
+ 0: adcdcde
+
+/a[bcd]+dcdcde/
+ *** Failers
+No match
+ abcde
+No match
+ adcdcde
+No match
+
+/(ab|a)b*c/
+ abc
+ 0: abc
+ 1: ab
+
+/((a)(b)c)(d)/
+ abcd
+ 0: abcd
+ 1: abc
+ 2: a
+ 3: b
+ 4: d
+
+/[a-zA-Z_][a-zA-Z0-9_]*/
+ alpha
+ 0: alpha
+
+/^a(bc+|b[eh])g|.h$/
+ abh
+ 0: bh
+
+/(bc+d$|ef*g.|h?i(j|k))/
+ effgz
+ 0: effgz
+ 1: effgz
+ ij
+ 0: ij
+ 1: ij
+ 2: j
+ reffgz
+ 0: effgz
+ 1: effgz
+ *** Failers
+No match
+ effg
+No match
+ bcdd
+No match
+
+/((((((((((a))))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/((((((((((a))))))))))\9/
+ aa
+ 0: aa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/(((((((((a)))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+
+/multiple words of text/
+ *** Failers
+No match
+ aa
+No match
+ uh-uh
+No match
+
+/multiple words/
+ multiple words, yeah
+ 0: multiple words
+
+/(.*)c(.*)/
+ abcde
+ 0: abcde
+ 1: ab
+ 2: de
+
+/\((.*), (.*)\)/
+ (a, b)
+ 0: (a, b)
+ 1: a
+ 2: b
+
+/abcd/
+ abcd
+ 0: abcd
+
+/a(bc)d/
+ abcd
+ 0: abcd
+ 1: bc
+
+/a[-]?c/
+ ac
+ 0: ac
+
+/(abc)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/([a-c]*)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/(a)|\1/
+ a
+ 0: a
+ 1: a
+ *** Failers
+ 0: a
+ 1: a
+ ab
+ 0: a
+ 1: a
+ x
+No match
+
+/abc/i
+ ABC
+ 0: ABC
+ XABCY
+ 0: ABC
+ ABABC
+ 0: ABC
+ *** Failers
+No match
+ aaxabxbaxbbx
+No match
+ XBC
+No match
+ AXC
+No match
+ ABX
+No match
+
+/ab*c/i
+ ABC
+ 0: ABC
+
+/ab*bc/i
+ ABC
+ 0: ABC
+ ABBC
+ 0: ABBC
+
+/ab+bc/i
+ *** Failers
+No match
+ ABC
+No match
+ ABQ
+No match
+
+/ab+bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/^abc$/i
+ ABC
+ 0: ABC
+ *** Failers
+No match
+ ABBBBC
+No match
+ ABCC
+No match
+
+/^abc/i
+ ABCC
+ 0: ABC
+
+/abc$/i
+ AABC
+ 0: ABC
+
+/^/i
+ ABC
+ 0:
+
+/$/i
+ ABC
+ 0:
+
+/a.c/i
+ ABC
+ 0: ABC
+ AXC
+ 0: AXC
+
+/a.*c/i
+ *** Failers
+No match
+ AABC
+ 0: AABC
+ AXYZD
+No match
+
+/a[bc]d/i
+ ABD
+ 0: ABD
+
+/a[b-d]e/i
+ ACE
+ 0: ACE
+ *** Failers
+No match
+ ABC
+No match
+ ABD
+No match
+
+/a[b-d]/i
+ AAC
+ 0: AC
+
+/a[-b]/i
+ A-
+ 0: A-
+
+/a[b-]/i
+ A-
+ 0: A-
+
+/a[]]b/i
+ A]B
+ 0: A]B
+
+/a[^bc]d/i
+ AED
+ 0: AED
+
+/a[^-b]c/i
+ ADC
+ 0: ADC
+ *** Failers
+No match
+ ABD
+No match
+ A-C
+No match
+
+/a[^]b]c/i
+ ADC
+ 0: ADC
+
+/ab|cd/i
+ ABC
+ 0: AB
+ ABCD
+ 0: AB
+
+/()ef/i
+ DEF
+ 0: EF
+ 1:
+
+/$b/i
+ *** Failers
+No match
+ A]C
+No match
+ B
+No match
+
+/a\(b/i
+ A(B
+ 0: A(B
+
+/a\(*b/i
+ AB
+ 0: AB
+ A((B
+ 0: A((B
+
+/((a))/i
+ ABC
+ 0: A
+ 1: A
+ 2: A
+
+/(a)b(c)/i
+ ABC
+ 0: ABC
+ 1: A
+ 2: C
+
+/a+b+c/i
+ AABBABC
+ 0: ABC
+
+/a{1,}b{1,}c/i
+ AABBABC
+ 0: ABC
+
+/(a+|b)*/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){0,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)+/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){1,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)?/i
+ AB
+ 0: A
+ 1: A
+
+/(a+|b){0,1}/i
+ AB
+ 0: A
+ 1: A
+
+/[^ab]*/i
+ CDE
+ 0: CDE
+
+/([abc])*d/i
+ ABBBCD
+ 0: ABBBCD
+ 1: C
+
+/([abc])*bcd/i
+ ABCD
+ 0: ABCD
+ 1: A
+
+/a|b|c|d|e/i
+ E
+ 0: E
+
+/(a|b|c|d|e)f/i
+ EF
+ 0: EF
+ 1: E
+
+/abcd*efg/i
+ ABCDEFG
+ 0: ABCDEFG
+
+/ab*/i
+ XABYABBBZ
+ 0: AB
+ XAYABBBZ
+ 0: A
+
+/(ab|cd)e/i
+ ABCDE
+ 0: CDE
+ 1: CD
+
+/[abhgefdc]ij/i
+ HIJ
+ 0: HIJ
+
+/^(ab|cd)e/i
+ ABCDE
+No match
+
+/(abc|)ef/i
+ ABCDEF
+ 0: EF
+ 1:
+
+/(a|b)c*d/i
+ ABCD
+ 0: BCD
+ 1: B
+
+/(ab|ab*)bc/i
+ ABC
+ 0: ABC
+ 1: A
+
+/a([bc]*)c*/i
+ ABC
+ 0: ABC
+ 1: BC
+
+/a([bc]*)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]+)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]*)(c+d)/i
+ ABCD
+ 0: ABCD
+ 1: B
+ 2: CD
+
+/a[bcd]*dcdcde/i
+ ADCDCDE
+ 0: ADCDCDE
+
+/a[bcd]+dcdcde/i
+
+/(ab|a)b*c/i
+ ABC
+ 0: ABC
+ 1: AB
+
+/((a)(b)c)(d)/i
+ ABCD
+ 0: ABCD
+ 1: ABC
+ 2: A
+ 3: B
+ 4: D
+
+/[a-zA-Z_][a-zA-Z0-9_]*/i
+ ALPHA
+ 0: ALPHA
+
+/^a(bc+|b[eh])g|.h$/i
+ ABH
+ 0: BH
+
+/(bc+d$|ef*g.|h?i(j|k))/i
+ EFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+ IJ
+ 0: IJ
+ 1: IJ
+ 2: J
+ REFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+ *** Failers
+No match
+ ADCDCDE
+No match
+ EFFG
+No match
+ BCDD
+No match
+
+/((((((((((a))))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/((((((((((a))))))))))\9/i
+ AA
+ 0: AA
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/(((((((((a)))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+
+/multiple words of text/i
+ *** Failers
+No match
+ AA
+No match
+ UH-UH
+No match
+
+/multiple words/i
+ MULTIPLE WORDS, YEAH
+ 0: MULTIPLE WORDS
+
+/(.*)c(.*)/i
+ ABCDE
+ 0: ABCDE
+ 1: AB
+ 2: DE
+
+/\((.*), (.*)\)/i
+ (A, B)
+ 0: (A, B)
+ 1: A
+ 2: B
+
+/abcd/i
+ ABCD
+ 0: ABCD
+
+/a(bc)d/i
+ ABCD
+ 0: ABCD
+ 1: BC
+
+/a[-]?c/i
+ AC
+ 0: AC
+
+/(abc)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/([a-c]*)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/((foo)|(bar))*/
+ foobar
+ 0: foobar
+ 1: bar
+ 2: foo
+ 3: bar
+
+/^(.+)?B/
+ AB
+ 0: AB
+ 1: A
+
+/^([^a-z])|(\^)$/
+ .
+ 0: .
+ 1: .
+
+/^[<>]&/
+ <&OUT
+ 0: <&
+
+/^(){3,5}/
+ abc
+ 0:
+ 1:
+
+/^(a+)*ax/
+ aax
+ 0: aax
+ 1: a
+
+/^((a|b)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/^((a|bc)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/(a|x)*ab/
+ cab
+ 0: ab
+
+/(a)*ab/
+ cab
+ 0: ab
+
+/(ab)[0-9]\1/i
+ Ab4ab
+ 0: Ab4ab
+ 1: Ab
+ ab4Ab
+ 0: ab4Ab
+ 1: ab
+
+/foo\w*[0-9]{4}baz/
+ foobar1234baz
+ 0: foobar1234baz
+
+/(\w+:)+/
+ one:
+ 0: one:
+ 1: one:
+
+/((\w|:)+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: :
+ 3: abcd
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/(a*)b+/
+ caab
+ 0: aab
+ 1: aa
+
+/((\w|:)+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: :
+ 3: abcd
+ *** Failers
+ 0: Failers
+ 1: <unset>
+ 2: <unset>
+ 3: Failers
+ abcd:
+No match
+ abcd:
+No match
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/((Z)+|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2: Z
+
+/(Z()|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+
+/(Z(())|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+ 3:
+
+/(.*)[0-9]+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+
+/((.*))[0-9]+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ 2: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+ 2: bc
diff --git a/testsuite/SPENCER.tests b/testsuite/SPENCER.tests
new file mode 100644
index 0000000..f125523
--- /dev/null
+++ b/testsuite/SPENCER.tests
@@ -0,0 +1,528 @@
+# Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
+# This software is not subject to any license of the American Telephone
+# and Telegraph Company or of the Regents of the University of California.
+#
+# Permission is granted to anyone to use this software for any purpose on
+# any computer system, and to alter it and redistribute it, subject
+# to the following restrictions:
+#
+# 1. The author is not responsible for the consequences of use of this
+# software, no matter how awful, even if they arise from flaws in it.
+#
+# 2. The origin of this software must not be misrepresented, either by
+# explicit claim or by omission. Since few users ever read sources,
+# credits must appear in the documentation.
+#
+# 3. Altered versions must be plainly marked as such, and must not be
+# misrepresented as being the original software. Since few users
+# ever read sources, credits must appear in the documentation.
+#
+# 4. This notice may not be removed or altered.
+#
+# regular expression test set
+# Lines are at least three fields, separated by one or more tabs. "" stands
+# for an empty field. First field is an RE. Second field is flags. If
+# C flag given, regcomp() is expected to fail, and the third field is the
+# error name (minus the leading REG_).
+#
+# Otherwise it is expected to succeed, and the third field is the string to
+# try matching it against. If there is no fourth field, the match is
+# expected to fail. If there is a fourth field, it is the substring that
+# the RE is expected to match. If there is a fifth field, it is a comma-
+# separated list of what the subexpressions should match, with - indicating
+# no match for that one. In both the fourth and fifth fields, a (sub)field
+# starting with @ indicates that the (sub)expression is expected to match
+# a null string followed by the stuff after the @; this provides a way to
+# test where null strings match. The character `N' in REs and strings
+# is newline, `S' is space, `T' is tab, `Z' is NUL.
+#
+# The full list of flags:
+# - placeholder, does nothing
+# b RE is a BRE, not an ERE
+# & try it as both an ERE and a BRE
+# C regcomp() error expected, third field is error name
+# i REG_ICASE
+# m ("mundane") REG_NOSPEC
+# s REG_NOSUB (not really testable)
+# n REG_NEWLINE
+# ^ REG_NOTBOL
+# $ REG_NOTEOL
+# # REG_STARTEND (see below)
+# p REG_PEND
+#
+# For REG_STARTEND, the start/end offsets are those of the substring
+# enclosed in ().
+
+# basics
+a & a a
+abc & abc abc
+abc|de - abc abc
+a|b|c - abc a
+
+# parentheses and perversions thereof
+a(b)c - abc abc
+a\(b\)c b abc abc
+a( C EPAREN
+a( b a( a(
+a\( - a( a(
+a\( bC EPAREN
+a\(b bC EPAREN
+a(b C EPAREN
+a(b b a(b a(b
+# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
+a) - a) a)
+) - ) )
+# end gagging (in a just world, those *should* give EPAREN)
+a) b a) a)
+a\) bC EPAREN
+\) bC EPAREN
+a()b - ab ab
+a\(\)b b ab ab
+
+# anchoring and REG_NEWLINE
+^abc$ & abc abc
+a^b - a^b
+a^b b a^b a^b
+a$b - a$b
+a$b b a$b a$b
+^ & abc @abc
+$ & abc @
+^$ & "" @
+$^ - "" @
+\($\)\(^\) b "" @
+# stop retching, those are legitimate (although disgusting)
+^^ - "" @
+$$ - "" @
+b$ & abNc
+b$ &n abNc b
+^b$ & aNbNc
+^b$ &n aNbNc b
+^$ &n aNNb @Nb
+^$ n abc
+^$ n abcN @
+$^ n aNNb @Nb
+\($\)\(^\) bn aNNb @Nb
+^^ n^ aNNb @Nb
+$$ n aNNb @NN
+^a ^ a
+a$ $ a
+^a ^n aNb
+^b ^n aNb b
+a$ $n bNa
+b$ $n bNa b
+a*(^b$)c* - b b
+a*\(^b$\)c* b b b
+
+# certain syntax errors and non-errors
+| C EMPTY
+| b | |
+* C BADRPT
+* b * *
++ C BADRPT
+? C BADRPT
+"" &C EMPTY
+() - abc @abc
+\(\) b abc @abc
+a||b C EMPTY
+|ab C EMPTY
+ab| C EMPTY
+(|a)b C EMPTY
+(a|)b C EMPTY
+(*a) C BADRPT
+(+a) C BADRPT
+(?a) C BADRPT
+({1}a) C BADRPT
+\(\{1\}a\) bC BADRPT
+(a|*b) C BADRPT
+(a|+b) C BADRPT
+(a|?b) C BADRPT
+(a|{1}b) C BADRPT
+^* C BADRPT
+^* b * *
+^+ C BADRPT
+^? C BADRPT
+^{1} C BADRPT
+^\{1\} bC BADRPT
+
+# metacharacters, backslashes
+a.c & abc abc
+a[bc]d & abd abd
+a\*c & a*c a*c
+a\\b & a\b a\b
+a\\\*b & a\*b a\*b
+# The following test is wrong. Using \b in an BRE or ERE is undefined.
+# a\bc & abc abc
+a\ &C EESCAPE
+a\\bc & a\bc a\bc
+\{ bC BADRPT
+a\[b & a[b a[b
+a[b &C EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$ & a a
+a$ & a$
+a\$ & a
+a\$ & a$ a$
+a\\$ & a
+a\\$ & a$
+a\\$ & a\$
+a\\$ & a\ a\
+
+# back references, ugh
+a\(b\)\2c bC ESUBREG
+a\(b\1\)c bC ESUBREG
+a\(b*\)c\1d b abbcbbd abbcbbd bb
+a\(b*\)c\1d b abbcbd
+a\(b*\)c\1d b abbcbbbd
+^\(.\)\1 b abc
+a\([bc]\)\1d b abcdabbd abbd b
+a\(\([bc]\)\2\)*d b abbccd abbccd
+a\(\([bc]\)\2\)*d b abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d b abbbd abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1 b ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd b aabcd aabcd
+\(a\)\1bc*d b aabcd aabcd
+\(a\)\1bc*d b aabd aabd
+\(a\)\1bc*d b aabcccd aabcccd
+\(a\)\1bc*[ce]d b aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
+
+# ordinary repetitions
+ab*c & abc abc
+ab+c - abc abc
+ab?c - abc abc
+a\(*\)b b a*b a*b
+a\(**\)b b ab ab
+a\(***\)b bC BADRPT
+*a b *a *a
+**a b a a
+***a bC BADRPT
+
+# the dreaded bounded repetitions
+# The following two tests are not correct:
+#{ & { {
+#{abc & {abc {abc
+# '{' is always a special char outside bracket expressions. So test ony BRE:
+{ b { {
+{abc b {abc {abc
+{1 C BADRPT
+{1} C BADRPT
+# Same reason as for the two tests above:
+#a{b & a{b a{b
+a{b b a{b a{b
+a{1}b - ab ab
+a\{1\}b b ab ab
+a{1,}b - ab ab
+a\{1,\}b b ab ab
+a{1,2}b - aab aab
+a\{1,2\}b b aab aab
+a{1 C EBRACE
+a\{1 bC EBRACE
+a{1a C EBRACE
+a\{1a bC EBRACE
+a{1a} C BADBR
+a\{1a\} bC BADBR
+# These four tests checks for undefined behavior. Our implementation does
+# something different.
+#a{,2} - a{,2} a{,2}
+#a\{,2\} bC BADBR
+#a{,} - a{,} a{,}
+#a\{,\} bC BADBR
+a{1,x} C BADBR
+a\{1,x\} bC BADBR
+a{1,x C EBRACE
+a\{1,x bC EBRACE
+# These two tests probably fails due to an arbitrary limit on the number of
+# repetitions in the other implementation.
+#a{300} C BADBR
+#a\{300\} bC BADBR
+a{1,0} C BADBR
+a\{1,0\} bC BADBR
+ab{0,0}c - abcac ac
+ab\{0,0\}c b abcac ac
+ab{0,1}c - abcac abc
+ab\{0,1\}c b abcac abc
+ab{0,3}c - abbcac abbc
+ab\{0,3\}c b abbcac abbc
+ab{1,1}c - acabc abc
+ab\{1,1\}c b acabc abc
+ab{1,3}c - acabc abc
+ab\{1,3\}c b acabc abc
+ab{2,2}c - abcabbc abbc
+ab\{2,2\}c b abcabbc abbc
+ab{2,4}c - abcabbc abbc
+ab\{2,4\}c b abcabbc abbc
+((a{1,10}){1,10}){1,10} - a a a,a
+
+# multiple repetitions
+# Wow, there is serious disconnect here. The ERE grammar is like this:
+# ERE_expression : one_char_or_coll_elem_ERE
+# | '^'
+# | '$'
+# | '(' extended_reg_exp ')'
+# | ERE_expression ERE_dupl_symbol
+# ;
+# where ERE_dupl_symbol is any of the repetition methods. It is clear from
+# this that consecutive repetition is OK. On top of this, the one test not
+# marked as failing must fail. For BREs the situation is different, so we
+# use the four tests.
+#a** &C BADRPT
+a** bC BADRPT
+#a++ C BADRPT
+#a?? C BADRPT
+#a*+ C BADRPT
+#a*? C BADRPT
+#a+* C BADRPT
+#a+? C BADRPT
+#a?* C BADRPT
+#a?+ C BADRPT
+#a{1}{1} C BADRPT
+#a*{1} C BADRPT
+#a+{1} C BADRPT
+#a?{1} C BADRPT
+#a{1}* C BADRPT
+#a{1}+ C BADRPT
+#a{1}? C BADRPT
+#a*{b} - a{b} a{b}
+a\{1\}\{1\} bC BADRPT
+a*\{1\} bC BADRPT
+a\{1\}* bC BADRPT
+
+# brackets, and numerous perversions thereof
+a[b]c & abc abc
+a[ab]c & abc abc
+a[^ab]c & adc adc
+a[]b]c & a]c a]c
+a[[b]c & a[c a[c
+a[-b]c & a-c a-c
+a[^]b]c & adc adc
+a[^-b]c & adc adc
+a[b-]c & a-c a-c
+a[b &C EBRACK
+a[] &C EBRACK
+a[1-3]c & a2c a2c
+a[3-1]c &C ERANGE
+a[1-3-5]c &C ERANGE
+a[[.-.]--]c & a-c a-c
+# I don't thing the error value should be ERANGE since a[1-] would be
+# valid, too. Expect EBRACK.
+#a[1- &C ERANGE
+a[1- &C EBRACK
+a[[. &C EBRACK
+a[[.x &C EBRACK
+a[[.x. &C EBRACK
+a[[.x.] &C EBRACK
+a[[.x.]] & ax ax
+a[[.x,.]] &C ECOLLATE
+# This test is invalid. "one" is no collating symbol in any standardized
+# locale.
+# a[[.one.]]b & a1b a1b
+a[[.notdef.]]b &C ECOLLATE
+a[[.].]]b & a]b a]b
+a[[:alpha:]]c & abc abc
+a[[:notdef:]]c &C ECTYPE
+a[[: &C EBRACK
+a[[:alpha &C EBRACK
+a[[:alpha:] &C EBRACK
+a[[:alpha,:] &C ECTYPE
+a[[:]:]]b &C ECTYPE
+a[[:-:]]b &C ECTYPE
+a[[:alph:]] &C ECTYPE
+a[[:alphabet:]] &C ECTYPE
+[[:alnum:]]+ - -%@a0X- a0X
+[[:alpha:]]+ - -%@aX0- aX
+[[:blank:]]+ - aSSTb SST
+[[:cntrl:]]+ - aNTb NT
+[[:digit:]]+ - a019b 019
+[[:graph:]]+ - Sa%bS a%b
+[[:lower:]]+ - AabC ab
+[[:print:]]+ - NaSbN aSb
+[[:punct:]]+ - S%-&T %-&
+[[:space:]]+ - aSNTb SNT
+[[:upper:]]+ - aBCd BC
+[[:xdigit:]]+ - p0f3Cq 0f3C
+a[[=b=]]c & abc abc
+a[[= &C EBRACK
+a[[=b &C EBRACK
+a[[=b= &C EBRACK
+a[[=b=] &C EBRACK
+a[[=b,=]] &C ECOLLATE
+# This test is invalid. "one" is no collating symbol in any standardized
+# locale.
+#a[[=one=]]b & a1b a1b
+
+# complexities
+a(((b)))c - abc abc
+a(b|(c))d - abd abd
+a(b*|c)d - abbd abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
+1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
+12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
+123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
+[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
+
+# subtleties of matching
+abc & xabcy abc
+a\(b\)?c\1d b acd
+aBc i Abc Abc
+a[Bc]*d i abBCcd abBCcd
+0[[:upper:]]1 &i 0a1 0a1
+0[[:lower:]]1 &i 0A1 0A1
+a[^b]c &i abc
+a[^b]c &i aBc
+a[^b]c &i adc adc
+[a]b[c] - abc abc
+[a]b[a] - aba aba
+[abc]b[abc] - abc abc
+[abc]b[abd] - abd abd
+a(b?c)+d - accd accd
+(wee|week)(knights|night) - weeknights weeknights
+(we|wee|week|frob)(knights|night|day) - weeknights weeknights
+a[bc]d - xyzaaabcaababdacd abd
+a[ab]c - aaabc abc
+abc s abc abc
+a* & b @b
+
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/ - /*x*/ /*x*/
+# but...
+/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
+/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
+/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
+
+# subexpressions
+.* - abc abc -
+a(b)(c)d - abcd abcd b,c
+a(((b)))c - abc abc b,b,b
+a(b|(c))d - abd abd b,-
+a(b*|c|e)d - abbd abbd bb
+a(b*|c|e)d - acd acd c
+a(b*|c|e)d - ad ad @d
+a(b?)c - abc abc b
+a(b?)c - ac ac @c
+a(b+)c - abc abc b
+a(b+)c - abbbc abbbc bbb
+a(b*)c - ac ac @c
+(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
+# the regression tester only asks for 9 subexpressions
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
+a([bc]?)c - abc abc b
+a([bc]?)c - ac ac @c
+a([bc]+)c - abc abc b
+a([bc]+)c - abcc abcc bc
+a([bc]+)bc - abcbc abcbc bc
+a(bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abbb abbb bb
+a(bbb+|bb+|b)bb - abbb abbb b
+(.*).* - abcdef abcdef abcdef
+(a*)* - bc @b @b
+
+# do we get the right subexpression when it is used more than once?
+a(b|c)*d - ad ad -
+a(b|c)*d - abcd abcd c
+a(b|c)+d - abd abd b
+a(b|c)+d - abcd abcd c
+a(b|c?)+d - ad ad @d
+a(b|c?)+d - abcd abcd c
+a(b|c){0,0}d - ad ad -
+a(b|c){0,1}d - ad ad -
+a(b|c){0,1}d - abd abd b
+a(b|c){0,2}d - ad ad -
+a(b|c){0,2}d - abcd abcd c
+a(b|c){0,}d - ad ad -
+a(b|c){0,}d - abcd abcd c
+a(b|c){1,1}d - abd abd b
+a(b|c){1,1}d - acd acd c
+a(b|c){1,2}d - abd abd b
+a(b|c){1,2}d - abcd abcd c
+a(b|c){1,}d - abd abd b
+a(b|c){1,}d - abcd abcd c
+a(b|c){2,2}d - acbd acbd b
+a(b|c){2,2}d - abcd abcd c
+a(b|c){2,4}d - abcd abcd c
+a(b|c){2,4}d - abcbd abcbd b
+a(b|c){2,4}d - abcbcd abcbcd c
+a(b|c){2,}d - abcd abcd c
+a(b|c){2,}d - abcbd abcbd b
+a(b+|((c)*))+d - abd abd b,-,-
+a(b+|((c)*))+d - abcd abcd c,c,c
+
+# check out the STARTEND option
+[abc] &# a(b)c b
+[abc] &# a(d)c
+[abc] &# a(bc)d b
+[abc] &# a(dc)d c
+. &# a()c
+b.*c &# b(bc)c bc
+b.* &# b(bc)c bc
+.*c &# b(bc)c bc
+
+# plain strings, with the NOSPEC flag
+abc m abc abc
+abc m xabcy abc
+abc m xyz
+a*b m aba*b a*b
+a*b m ab
+"" mC EMPTY
+
+# cases involving NULs
+aZb & a a
+aZb &p a
+aZb &p# (aZb) aZb
+aZ*b &p# (ab) ab
+a.b &# (aZb) aZb
+a.* &# (aZb)c aZb
+
+# word boundaries (ick)
+[[:<:]]a & a a
+[[:<:]]a & ba
+[[:<:]]a & -a a
+a[[:>:]] & a a
+a[[:>:]] & ab
+a[[:>:]] & a- a
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
+[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
+[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
+[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
+[[:<:]]a_b[[:>:]] & x_a_b
+
+# past problems, and suspected problems
+(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
+abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
+abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
+(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
+CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
+Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
+a?b - ab ab
+-\{0,1\}[0-9]*$ b -5 -5
+a*a*a*a*a*a*a* & aaaaaa aaaaaa
diff --git a/testsuite/allsub.good b/testsuite/allsub.good
new file mode 100644
index 0000000..234e159
--- /dev/null
+++ b/testsuite/allsub.good
@@ -0,0 +1 @@
+bar bar fo oo f oo bar bar bar bar bar bar bar bar bar bar bar bar bar
diff --git a/testsuite/allsub.inp b/testsuite/allsub.inp
new file mode 100644
index 0000000..f75655f
--- /dev/null
+++ b/testsuite/allsub.inp
@@ -0,0 +1 @@
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
diff --git a/testsuite/allsub.sed b/testsuite/allsub.sed
new file mode 100644
index 0000000..8aa29c1
--- /dev/null
+++ b/testsuite/allsub.sed
@@ -0,0 +1 @@
+s/foo/bar/g
diff --git a/testsuite/binary.good b/testsuite/binary.good
new file mode 100644
index 0000000..788024d
--- /dev/null
+++ b/testsuite/binary.good
@@ -0,0 +1,8 @@
+192
+168
+1
+0
+192
+168
+1
+255
diff --git a/testsuite/binary.inp b/testsuite/binary.inp
new file mode 100644
index 0000000..06bf77c
--- /dev/null
+++ b/testsuite/binary.inp
@@ -0,0 +1,4 @@
+192.168.1.2 br b8<r b16<r b24< R|R|R| D
+255.255.255.0 br b8<r b16<r b24< R|R|R| D~r
+& DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+| DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
diff --git a/testsuite/binary.sed b/testsuite/binary.sed
new file mode 100644
index 0000000..92a6a06
--- /dev/null
+++ b/testsuite/binary.sed
@@ -0,0 +1,189 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version does everything in pattern space (a la dc.sed).
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+
+1s/^/%%/
+
+:cmd
+s/\(.*%%\) *\([0-9][0-9]*\)/\2\
+\1/
+tcmd
+s/%% *#.*/%%/
+/%%$/ {
+ $b quit
+ N
+}
+
+/^.*%%D/ s/^[^\n]*\n/&&/
+/^.*%%P/ s/^[^\n]*\n//
+/^.*%%x/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+/^.*%%r/ s/^\([^\n]*\n\)\([^%]*\)/\2\1/
+/^.*%%R/ s/^\([^%]*\n\)\([^\n]*\n\)/\2\1/
+/^.*%%c/ s/^.*%%/%%/
+/^.*%%p/ P
+
+/^.*%%l/ {
+ h
+ s/.%%.*//
+ p
+ g
+}
+
+/^.*%%q/ {
+ :quit
+ /^%%/!P
+ d
+}
+
+/^.*%%b/ {
+ # Decimal to binary via analog form
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/^.*%%d/ {
+ # Binary to decimal via analog form
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/^.*%%&/ {
+ # Binary AND
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/^.*%%^/ {
+ # Binary XOR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/^.*%%|/ {
+ # Binary OR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/^.*%%~/ {
+ # Binary NOT
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/^.*%%</ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/^.*%%>/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+
+s/%%./%%/
+tcmd
diff --git a/testsuite/binary2.sed b/testsuite/binary2.sed
new file mode 100644
index 0000000..daf7706
--- /dev/null
+++ b/testsuite/binary2.sed
@@ -0,0 +1,226 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version keeps the stack in hold space and the command in pattern
+# space; it is the fastest one (though the gap with binary3.sed is small).
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+:cmd
+s/^[\n\t ]*//
+s/^#.*//
+/^$/ {
+ $b quit
+ N
+ t cmd
+}
+/^[0-9][0-9]*/ {
+ G
+ h
+ s/^[0-9][0-9]* *\([^\n]*\).*/\1/
+ x
+ s/^\([0-9][0-9]*\)[^\n]*/\1/
+ x
+ t cmd
+}
+
+/^[^DPxrRcplqbd&|^~<>]/b bad
+
+/^D/ {
+ x
+ s/^[^\n]*\n/&&/
+}
+/^P/ {
+ x
+ s/^[^\n]*\n//
+}
+/^x/ {
+ x
+ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+}
+/^r/ {
+ x
+ s/^\([^\n]*\n\)\(.*\)/\2\1/
+}
+/^R/ {
+ x
+ s/^\(.*\n\)\([^\n]*\n\)/\2\1/
+}
+/^c/ {
+ x
+ s/.*//
+}
+/^p/ {
+ x
+ P
+}
+
+/^l/ {
+ x
+ p
+}
+
+/^q/ {
+ :quit
+ x
+ /./P
+ d
+}
+
+/^b/ {
+ # Decimal to binary via analog form
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/^d/ {
+ # Binary to decimal via analog form
+ x
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/^&/ {
+ # Binary AND
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/^\^/ {
+ # Binary XOR
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/^|/ {
+ # Binary OR
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/^~/ {
+ # Binary NOT
+ x
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/^</ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/^>/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+x
+:bad
+s/^.//
+tcmd
diff --git a/testsuite/binary3.sed b/testsuite/binary3.sed
new file mode 100644
index 0000000..b877f14
--- /dev/null
+++ b/testsuite/binary3.sed
@@ -0,0 +1,204 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version keeps the stack and the current command in hold space and
+# the commands in pattern space; it is just a bit slower than binary2.sed
+# but more size optimized for broken seds which have a 199-command limit
+# (though binary2.sed does not have this much).
+#
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+:cmd
+s/^[\n\t ]*//
+s/^#.*//
+/^$/ {
+ $b quit
+ N
+ t cmd
+}
+/^[0-9][0-9]*/ {
+ G
+ h
+ s/^[0-9][0-9]* *\([^\n]*\).*/\1/
+ x
+ s/^\([0-9][0-9]*\)[^\n]*/\1/
+ x
+ t cmd
+}
+
+/^[^DPxrRcplqbd&|^~<>]/bbad
+
+H
+x
+s/\(\n[^\n]\)[^\n]*$/\1/
+
+/D$/ s/^[^\n]*\n/&&/
+/P$/ s/^[^\n]*\n//
+/x$/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+/r$/ s/^\([^\n]*\n\)\(.*\)\(..\)/\2\1\3/
+/R$/ s/^\(.*\n\)\([^\n]*\n\)\(..\)/\2\1\3/
+/c$/ s/.*//
+/p$/ P
+/l$/ {
+ s/...$//
+ p
+ t cmd
+}
+
+/q$/ {
+ :quit
+ /.../P
+ d
+}
+
+/b$/ {
+ # Decimal to binary via analog form
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/d$/ {
+ # Binary to decimal via analog form
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/&$/ {
+ # Binary AND
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/\^$/ {
+ # Binary XOR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/|$/ {
+ # Binary OR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/~$/ {
+ # Binary NOT
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/<$/ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/>$/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+s/..$//
+x
+:bad
+s/^.//
+tcmd
diff --git a/testsuite/bkslashes.good b/testsuite/bkslashes.good
new file mode 100644
index 0000000..770d1e6
--- /dev/null
+++ b/testsuite/bkslashes.good
@@ -0,0 +1,2 @@
+a\
+
diff --git a/testsuite/bkslashes.inp b/testsuite/bkslashes.inp
new file mode 100644
index 0000000..7898192
--- /dev/null
+++ b/testsuite/bkslashes.inp
@@ -0,0 +1 @@
+a
diff --git a/testsuite/bkslashes.sed b/testsuite/bkslashes.sed
new file mode 100644
index 0000000..aa8c66c
--- /dev/null
+++ b/testsuite/bkslashes.sed
@@ -0,0 +1,3 @@
+# bug in sed 4.0b
+s/$/\\\
+/
diff --git a/testsuite/bsd.good b/testsuite/bsd.good
new file mode 100644
index 0000000..0e21b0f
--- /dev/null
+++ b/testsuite/bsd.good
@@ -0,0 +1,1737 @@
+============
+Test 1.1:101
+============
+Testing argument parsing
+First type
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.2:102
+============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+============
+Test 1.3:103
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.4:104
+============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+Second type
+
+==============
+Test 1.4.1:105
+==============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 1.5:106
+============
+s1_l1_1
+s1_l1_1
+s1_l1_2
+s1_l1_2
+s1_l1_3
+s1_l1_3
+s1_l1_4
+s1_l1_4
+s1_l1_5
+s1_l1_5
+s1_l1_6
+s1_l1_6
+s1_l1_7
+s1_l1_7
+s1_l1_8
+s1_l1_8
+s1_l1_9
+s1_l1_9
+s1_l1_10
+s1_l1_10
+s1_l1_11
+s1_l1_11
+s1_l1_12
+s1_l1_12
+s1_l1_13
+s1_l1_13
+s1_l1_14
+s1_l1_14
+
+============
+Test 1.6:107
+============
+s1_l1_1
+s1_l1_1
+s1_l1_2
+s1_l1_2
+s1_l1_3
+s1_l1_3
+s1_l1_4
+s1_l1_4
+s1_l1_5
+s1_l1_5
+s1_l1_6
+s1_l1_6
+s1_l1_7
+s1_l1_7
+s1_l1_8
+s1_l1_8
+s1_l1_9
+s1_l1_9
+s1_l1_10
+s1_l1_10
+s1_l1_11
+s1_l1_11
+s1_l1_12
+s1_l1_12
+s1_l1_13
+s1_l1_13
+s1_l1_14
+s1_l1_14
+
+============
+Test 1.7:108
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.8:109
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.9:110
+============
+s1_l1_1
+s1_l1_2
+s1_l1_3
+s1_l1_4
+s1_l1_5
+s1_l1_6
+s1_l1_7
+s1_l1_8
+s1_l1_9
+s1_l1_10
+s1_l1_11
+s1_l1_12
+s1_l1_13
+s1_l1_14
+
+=============
+Test 1.10:111
+=============
+s1_l1_1
+s1_l1_2
+s1_l1_3
+s1_l1_4
+s1_l1_5
+s1_l1_6
+s1_l1_7
+s1_l1_8
+s1_l1_9
+s1_l1_10
+s1_l1_11
+s1_l1_12
+s1_l1_13
+s1_l1_14
+
+=============
+Test 1.11:112
+=============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+=============
+Test 1.12:113
+=============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+=============
+Test 1.13:114
+=============
+e1_l1_1
+e2_e1_l1_1
+e2_e1_l1_1
+e1_l1_2
+e2_e1_l1_2
+e2_e1_l1_2
+e1_l1_3
+e2_e1_l1_3
+e2_e1_l1_3
+e1_l1_4
+e2_e1_l1_4
+e2_e1_l1_4
+e1_l1_5
+e2_e1_l1_5
+e2_e1_l1_5
+e1_l1_6
+e2_e1_l1_6
+e2_e1_l1_6
+e1_l1_7
+e2_e1_l1_7
+e2_e1_l1_7
+e1_l1_8
+e2_e1_l1_8
+e2_e1_l1_8
+e1_l1_9
+e2_e1_l1_9
+e2_e1_l1_9
+e1_l1_10
+e2_e1_l1_10
+e2_e1_l1_10
+e1_l1_11
+e2_e1_l1_11
+e2_e1_l1_11
+e1_l1_12
+e2_e1_l1_12
+e2_e1_l1_12
+e1_l1_13
+e2_e1_l1_13
+e2_e1_l1_13
+e1_l1_14
+e2_e1_l1_14
+e2_e1_l1_14
+
+=============
+Test 1.14:115
+=============
+s1_l1_1
+s2_s1_l1_1
+s2_s1_l1_1
+s1_l1_2
+s2_s1_l1_2
+s2_s1_l1_2
+s1_l1_3
+s2_s1_l1_3
+s2_s1_l1_3
+s1_l1_4
+s2_s1_l1_4
+s2_s1_l1_4
+s1_l1_5
+s2_s1_l1_5
+s2_s1_l1_5
+s1_l1_6
+s2_s1_l1_6
+s2_s1_l1_6
+s1_l1_7
+s2_s1_l1_7
+s2_s1_l1_7
+s1_l1_8
+s2_s1_l1_8
+s2_s1_l1_8
+s1_l1_9
+s2_s1_l1_9
+s2_s1_l1_9
+s1_l1_10
+s2_s1_l1_10
+s2_s1_l1_10
+s1_l1_11
+s2_s1_l1_11
+s2_s1_l1_11
+s1_l1_12
+s2_s1_l1_12
+s2_s1_l1_12
+s1_l1_13
+s2_s1_l1_13
+s2_s1_l1_13
+s1_l1_14
+s2_s1_l1_14
+s2_s1_l1_14
+
+=============
+Test 1.15:116
+=============
+e1_l1_1
+s1_e1_l1_1
+s1_e1_l1_1
+e1_l1_2
+s1_e1_l1_2
+s1_e1_l1_2
+e1_l1_3
+s1_e1_l1_3
+s1_e1_l1_3
+e1_l1_4
+s1_e1_l1_4
+s1_e1_l1_4
+e1_l1_5
+s1_e1_l1_5
+s1_e1_l1_5
+e1_l1_6
+s1_e1_l1_6
+s1_e1_l1_6
+e1_l1_7
+s1_e1_l1_7
+s1_e1_l1_7
+e1_l1_8
+s1_e1_l1_8
+s1_e1_l1_8
+e1_l1_9
+s1_e1_l1_9
+s1_e1_l1_9
+e1_l1_10
+s1_e1_l1_10
+s1_e1_l1_10
+e1_l1_11
+s1_e1_l1_11
+s1_e1_l1_11
+e1_l1_12
+s1_e1_l1_12
+s1_e1_l1_12
+e1_l1_13
+s1_e1_l1_13
+s1_e1_l1_13
+e1_l1_14
+s1_e1_l1_14
+s1_e1_l1_14
+
+=============
+Test 1.16:117
+=============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+=============
+Test 1.17:118
+=============
+l1_1
+l1_1
+l1_2
+l1_2
+l1_3
+l1_3
+l1_4
+l1_4
+l1_5
+l1_5
+l1_6
+l1_6
+l1_7
+l1_7
+l1_8
+l1_8
+l1_9
+l1_9
+l1_10
+l1_10
+l1_11
+l1_11
+l1_12
+l1_12
+l1_13
+l1_13
+l1_14
+l1_14
+
+=============
+Test 1.18:119
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+Testing address ranges
+
+============
+Test 2.1:120
+============
+l1_4
+
+============
+Test 2.2:121
+============
+l2_6
+
+============
+Test 2.3:122
+============
+l1_14
+
+============
+Test 2.4:123
+============
+l2_9
+
+============
+Test 2.5:124
+============
+
+============
+Test 2.6:125
+============
+l2_9
+
+============
+Test 2.7:126
+============
+
+============
+Test 2.9:127
+============
+l1_7
+
+=============
+Test 2.10:128
+=============
+l1_7
+
+=============
+Test 2.11:129
+=============
+l1_7
+
+=============
+Test 2.12:130
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+
+=============
+Test 2.13:131
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.14:132
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.15:133
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.16:134
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+
+=============
+Test 2.17:135
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.18:136
+=============
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.19:137
+=============
+l1_12
+
+=============
+Test 2.20:138
+=============
+l1_7
+Brace and other grouping
+
+============
+Test 3.1:139
+============
+l1_1
+l1_2
+l1_3
+^l1T4$
+^l1T5$
+^l1T6$
+^l1T7$
+^l1T8$
+^l1T9$
+^l1T10$
+^l1T11$
+^l1T12$
+l1_13
+l1_14
+
+============
+Test 3.2:140
+============
+l1_1
+l1_2
+l1_3
+^l1_4
+^l1_5
+^l1_6$
+^l1_7$
+^l1T8$
+^l1_9$
+^l1_10$
+^l1_11
+^l1_12
+l1_13
+l1_14
+
+============
+Test 3.3:141
+============
+^l1T1$
+^l1T2$
+^l1T3$
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+^l1T13$
+^l1T14$
+
+============
+Test 3.4:142
+============
+^l1_1
+^l1_2
+^l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+^l1_13
+^l1_14
+Testing a c d and i commands
+
+============
+Test 4.1:143
+============
+before_il1_1
+after_ibefore_il1_1
+before_il1_2
+after_ibefore_il1_2
+before_il1_3
+after_ibefore_il1_3
+before_il1_4
+after_ibefore_il1_4
+before_il1_5
+after_ibefore_il1_5
+before_il1_6
+after_ibefore_il1_6
+before_il1_7
+after_ibefore_il1_7
+before_il1_8
+after_ibefore_il1_8
+before_il1_9
+after_ibefore_il1_9
+before_il1_10
+after_ibefore_il1_10
+before_il1_11
+after_ibefore_il1_11
+before_il1_12
+after_ibefore_il1_12
+before_il1_13
+after_ibefore_il1_13
+before_il1_14
+after_ibefore_il1_14
+before_il2_1
+after_ibefore_il2_1
+before_il2_2
+after_ibefore_il2_2
+before_il2_3
+after_ibefore_il2_3
+before_il2_4
+after_ibefore_il2_4
+before_il2_5
+after_ibefore_il2_5
+before_il2_6
+inserted
+after_ibefore_il2_6
+before_il2_7
+after_ibefore_il2_7
+before_il2_8
+after_ibefore_il2_8
+before_il2_9
+after_ibefore_il2_9
+
+============
+Test 4.2:144
+============
+before_al1_1
+after_abefore_al1_1
+before_al1_2
+after_abefore_al1_2
+before_al1_3
+after_abefore_al1_3
+before_al1_4
+after_abefore_al1_4
+before_a5-12l1_5
+after_abefore_a5-12l1_5
+appended
+before_a5-12l1_6
+after_abefore_a5-12l1_6
+appended
+before_a5-12l1_7
+after_abefore_a5-12l1_7
+appended
+before_a5-12l1_8
+after_abefore_a5-12l1_8
+appended
+before_a5-12l1_9
+after_abefore_a5-12l1_9
+appended
+before_a5-12l1_10
+after_abefore_a5-12l1_10
+appended
+before_a5-12l1_11
+after_abefore_a5-12l1_11
+appended
+before_a5-12l1_12
+after_abefore_a5-12l1_12
+appended
+before_al1_13
+after_abefore_al1_13
+before_al1_14
+after_abefore_al1_14
+before_al2_1
+after_abefore_al2_1
+before_al2_2
+after_abefore_al2_2
+before_al2_3
+after_abefore_al2_3
+before_al2_4
+after_abefore_al2_4
+before_al2_5
+after_abefore_al2_5
+before_al2_6
+after_abefore_al2_6
+before_al2_7
+after_abefore_al2_7
+before_al2_8
+after_abefore_al2_8
+before_al2_9
+after_abefore_al2_9
+
+============
+Test 4.3:145
+============
+^l1_1
+^l1_1$
+appended
+^l1_2
+^l1_2$
+appended
+^l1_3
+^l1_3$
+appended
+^l1_4
+^l1_4$
+appended
+^l1_5
+^l1_5$
+appended
+^l1_6
+^l1_6$
+appended
+^l1_7
+^l1_7$
+appended
+^l1_8
+appended
+^l1_8
+l1_9$
+^l1_10
+appended
+^l1_10
+l1_11$
+^l1_12
+^l1_12$
+appended
+^l1_13
+^l1_13$
+appended
+^l1_14
+^l1_14$
+appended
+^l2_1
+^l2_1$
+^l2_2
+^l2_2$
+^l2_3
+^l2_3$
+^l2_4
+^l2_4$
+^l2_5
+^l2_5$
+^l2_6
+^l2_6$
+^l2_7
+^l2_7$
+^l2_8
+^l2_8$
+^l2_9
+^l2_9$
+
+============
+Test 4.4:146
+============
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+
+============
+Test 4.5:147
+============
+hello
+
+============
+Test 4.6:148
+============
+hello
+
+============
+Test 4.7:149
+============
+hello
+
+============
+Test 4.8:150
+============
+Testing labels and branching
+
+============
+Test 5.1:151
+============
+label2_l1_1
+label3_label2_l1_1
+label1_l1_2
+label1_l1_3
+label1_l1_4
+label1_l1_5
+label1_l1_6
+label1_l1_7
+label1_l1_8
+label1_l1_9
+label1_l1_10
+label1_l1_11
+label1_l1_12
+label2_l1_13
+label3_label2_l1_13
+label2_l1_14
+label3_label2_l1_14
+
+============
+Test 5.2:152
+============
+tested l2_1
+tested l2_2
+tested l2_3
+tested l2_4
+tested l2_5
+tested l2_6
+tested l2_7
+tested l2_8
+tested l2_9
+tested l2_10
+tested l2_11
+tested l2_12
+tested l2_13
+tested l2_14
+
+============
+Test 5.3:153
+============
+^l1_1
+^l1_1$
+^l1_2
+^l1_2$
+^l1_3
+^l1_3$
+^l1_4
+^l1_4$
+l1_5$
+l1_6$
+l1_7$
+l1_8$
+
+============
+Test 5.4:154
+============
+^l1_1$
+^l1_2$
+^l1_3$
+^l1_4$
+^l1_5$
+^l1_6$
+^l1_7$
+^l1_8$
+l1_9$
+l1_10$
+l1_11$
+l1_12$
+l1_13$
+l1_14$
+
+============
+Test 5.5:155
+============
+^l1_1
+^l1_2
+^l1_4
+^l1_6
+^l1_8
+
+============
+Test 5.6:156
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+
+============
+Test 5.7:157
+============
+l1_1
+l1_2
+l1_3
+l1_4
+hello
+l1_5
+
+============
+Test 5.8:158
+============
+m1_1
+m1_2
+m1_3
+m1_4
+m1_5
+m1_6
+m1_7
+m1_8
+m1_9
+m1_10
+m1_11
+m1_12
+m1_13
+m1_14
+Pattern space commands
+
+============
+Test 6.1:159
+============
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+
+============
+Test 6.2:160
+============
+l1_1
+l1_2
+l1_3
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.3:161
+============
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.4:162
+============
+l1_1
+l1_2
+l1_3
+l1_2
+l1_3
+l1_5
+l1_2
+l1_3
+l1_2
+l1_3
+l1_6
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.5:163
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.6:164
+============
+Testing print and file routines
+
+============
+Test 7.1:165
+============
+\001\002\003\004\005\006\a\b\t$
+\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\
+\035\036\037 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX\
+YZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\
+\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\
+\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\
+\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\
+\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\
+\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\
+\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\
+\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\
+\375\376\377$
+$
+
+============
+Test 7.2:166
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+15
+l2_1
+16
+l2_2
+17
+l2_3
+18
+l2_4
+19
+l2_5
+20
+l2_6
+21
+l2_7
+22
+l2_8
+23
+l2_9
+
+============
+Test 7.3:167
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+w results
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+
+============
+Test 7.4:168
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.5:169
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.6:170
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.8:171
+============
+
+Testing substitution commands
+
+============
+Test 8.1:172
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.2:173
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.3:174
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.4:175
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 8.5:176
+============
+l1X1
+l1X2
+l1X3
+l1X4
+l1X5
+l1X6
+l1X7
+l1X8
+l1X9
+l1X10
+l1X11
+l1X12
+l1X13
+l1X14
+
+============
+Test 8.6:177
+============
+(l)(1)(_)(1)
+(l)(1)(_)(2)
+(l)(1)(_)(3)
+(l)(1)(_)(4)
+(l)(1)(_)(5)
+(l)(1)(_)(6)
+(l)(1)(_)(7)
+(l)(1)(_)(8)
+(l)(1)(_)(9)
+(l)(1)(_)(1)(0)
+(l)(1)(_)(1)(1)
+(l)(1)(_)(1)(2)
+(l)(1)(_)(1)(3)
+(l)(1)(_)(1)(4)
+
+============
+Test 8.7:178
+============
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+
+============
+Test 8.8:179
+============
+x_x1xl1
+x_x1xl2
+x_x1xl3
+x_x1xl4
+x_x1xl5
+x_x1xl6
+x_x1xl7
+x_x1xl8
+x_x1xl9
+x_x1xl10
+x_x1xl11
+x_x1xl12
+x_x1xl13
+x_x1xl14
+
+============
+Test 8.9:180
+============
+l1u0
+u1
+u21
+l1u0
+u1
+u22
+l1u0
+u1
+u23
+l1u0
+u1
+u24
+l1u0
+u1
+u25
+l1u0
+u1
+u26
+l1u0
+u1
+u27
+l1u0
+u1
+u28
+l1u0
+u1
+u29
+l1u0
+u1
+u210
+l1u0
+u1
+u211
+l1u0
+u1
+u212
+l1u0
+u1
+u213
+l1u0
+u1
+u214
+
+=============
+Test 8.10:181
+=============
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X0
+l1_X1
+l1_X2
+l1_X3
+l1_X4
+
+=============
+Test 8.11:182
+=============
+lX_1
+lX_2
+lX_3
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_10
+lX_11
+lX_12
+lX_13
+lX_14
+s wfile results
+lX_1
+lX_2
+lX_3
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_10
+lX_11
+lX_12
+lX_13
+lX_14
+
+=============
+Test 8.12:183
+=============
+lX_X
+lX_X
+lX_X
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_X0
+lX_XX
+lX_XX
+lX_XX
+lX_X4
+
+=============
+Test 8.13:184
+=============
+l8_8
+l8_7
+l8_6
+l8_5
+l8_4
+l8_3
+l8_2
+l8_1
+l8_0
+l8_89
+l8_88
+l8_87
+l8_86
+l8_85
+
+=============
+Test 8.14:185
+=============
+l8_8
+l8_7
+l8_6
+l8_5
+l8_4
+l8_3
+l8_2
+l8_1
+l8_0
+l8_89
+l8_88
+l8_87
+l8_86
+l8_85
+
+=============
+Test 8.15:186
+=============
+l1_1Xl1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+=============
+Test 8.16:187
+=============
+eeefff
+Xeefff
+XYefff
+XYeYff
+XYeYYf
+XYeYYY
+XYeYYY
diff --git a/testsuite/bsd.sh b/testsuite/bsd.sh
new file mode 100755
index 0000000..fecb2f4
--- /dev/null
+++ b/testsuite/bsd.sh
@@ -0,0 +1,434 @@
+#!/bin/sh -
+# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $
+#
+# Copyright (c) 1992 Diomidis Spinellis.
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# from: @(#)sed.test 8.1 (Berkeley) 6/6/93
+# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $
+#
+
+# sed Regression Tests
+
+# Modified by Paolo Bonzini to:
+# - not warn about buggy seds
+# - run tests once instead of comparing them to the system sed
+# - remove most uses of awk
+# - cleanup at exit
+# - comment tests that broke because of extensions
+
+main()
+{
+ TEST="${1-../sed/sed}"
+ TESTLOG="${2-sed.out}"
+ # DICT="${3-/usr/share/dict/words}"
+
+ : > lines1
+ : > lines2
+ for i in 1 2 3 4 5 6 7 8 9; do
+ echo l1_$i >> lines1
+ echo l2_$i >> lines2
+ done
+ for i in 10 11 12 13 14; do
+ echo l1_$i >> lines1
+ done
+
+ # Set these flags to get messages about known problems
+ tests "$TEST" "$TESTLOG"
+
+ rm -f lines[1234] script[12]
+}
+
+tests()
+{
+ SED="$1"
+ LOG="$2"
+ MARK=100
+ rm -f "$LOG"
+
+ exec 3>&0 4>&1 5>&2
+ exec 0</dev/null 1>/dev/null 2>/dev/null
+ test_error
+ exec 0>&3 1>&4 2>&5
+
+ exec 4>&1 5>&2
+ test_args
+ test_addr
+ test_group
+ test_acid
+ test_branch
+ test_pattern
+ test_print
+ test_subst
+ exec 1>&4 2>&5
+}
+
+mark()
+{
+ exec 2>&1 >>$LOG
+ test $MARK = 100 || echo
+ MARK=`expr $MARK + 1`
+ echo "Test $1:$MARK" | sed 's/./=/g'
+ echo "Test $1:$MARK"
+ echo "Test $1:$MARK" | sed 's/./=/g'
+}
+
+test_args()
+{
+ mark '1.1'
+ echo Testing argument parsing
+ echo First type
+ $SED 's/^/e1_/p' lines1
+ mark '1.2' ; $SED -n 's/^/e1_/p' lines1
+ mark '1.3' ; $SED 's/^/e1_/p' <lines1
+ mark '1.4' ; $SED -n 's/^/e1_/p' <lines1
+ echo Second type
+ mark '1.4.1'
+ $SED -e '' <lines1
+ echo 's/^/s1_/p' >script1
+ echo 's/^/s2_/p' >script2
+ mark '1.5' ; $SED -f script1 lines1
+ mark '1.6' ; $SED -f script1 <lines1
+ mark '1.7' ; $SED -e 's/^/e1_/p' lines1
+ mark '1.8' ; $SED -e 's/^/e1_/p' <lines1
+ mark '1.9' ; $SED -n -f script1 lines1
+ mark '1.10' ; $SED -n -f script1 <lines1
+ mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1
+ mark '1.12' ; $SED -n -e 's/^/e1_/p' <lines1
+ mark '1.13' ; $SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1
+ mark '1.14' ; $SED -f script1 -f script2 lines1
+ mark '1.15' ; $SED -e 's/^/e1_/p' -f script1 lines1
+ mark '1.16' ; $SED -e 's/^/e1_/p' lines1 lines1
+ # POSIX D11.2:11251
+ mark '1.17' ; $SED p <lines1 lines1
+cat >script1 <<EOF
+#n
+# A comment
+
+p
+EOF
+ mark '1.18' ; $SED -f script1 <lines1 lines1
+}
+
+test_addr()
+{
+ echo Testing address ranges
+ mark '2.1' ; $SED -n -e '4p' lines1
+ mark '2.2' ; $SED -n -e '20p' lines1 lines2
+ mark '2.3' ; $SED -n -e '$p' lines1
+ mark '2.4' ; $SED -n -e '$p' lines1 lines2
+ mark '2.5' ; $SED -n -e '$a\
+hello' /dev/null
+ mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2
+ # Should not print anything
+ mark '2.7' ; $SED -n -e '20p' lines1
+ # Disabled because it is undefined behavior
+ # mark '2.8' ; $SED -n -e '0p' lines1
+ mark '2.9' ; $SED -n '/l1_7/p' lines1
+ mark '2.10' ; $SED -n ' /l1_7/ p' lines1
+ mark '2.11' ; $SED -n '\_l1\_7_p' lines1
+ mark '2.12' ; $SED -n '1,4p' lines1
+ mark '2.13' ; $SED -n '1,$p' lines1 lines2
+ mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2
+ mark '2.15' ; $SED -n '/4/,$p' lines1 lines2
+ mark '2.16' ; $SED -n '/4/,20p' lines1 lines2
+ mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2
+ mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2
+ mark '2.19' ; $SED -n '12,3p' lines1 lines2
+ mark '2.20' ; $SED -n '/l1_7/,3p' lines1 lines2
+}
+
+test_group()
+{
+ echo Brace and other grouping
+ mark '3.1' ; $SED -e '
+4,12 {
+ s/^/^/
+ s/$/$/
+ s/_/T/
+}' lines1
+ mark '3.2' ; $SED -e '
+4,12 {
+ s/^/^/
+ /6/,/10/ {
+ s/$/$/
+ /8/ s/_/T/
+ }
+}' lines1
+ mark '3.3' ; $SED -e '
+4,12 !{
+ s/^/^/
+ /6/,/10/ !{
+ s/$/$/
+ /8/ !s/_/T/
+ }
+}' lines1
+ mark '3.4' ; $SED -e '4,12!s/^/^/' lines1
+}
+
+test_acid()
+{
+ echo Testing a c d and i commands
+ mark '4.1' ; $SED -n -e '
+s/^/before_i/p
+20i\
+inserted
+s/^/after_i/p
+' lines1 lines2
+ mark '4.2' ; $SED -n -e '
+5,12s/^/5-12/
+s/^/before_a/p
+/5-12/a\
+appended
+s/^/after_a/p
+' lines1 lines2
+ mark '4.3' ; $SED -n -e '
+s/^/^/p
+/l1_/a\
+appended
+8,10N
+s/$/$/p
+' lines1 lines2
+ mark '4.4' ; $SED -n -e '
+c\
+hello
+' lines1
+ mark '4.5' ; $SED -n -e '
+8c\
+hello
+' lines1
+ mark '4.6' ; $SED -n -e '
+3,14c\
+hello
+' lines1
+ mark '4.7' ; $SED -n -e '
+8,3c\
+hello
+' lines1
+ mark '4.8' ; $SED d <lines1
+}
+
+test_branch()
+{
+ echo Testing labels and branching
+ mark '5.1' ; $SED -n -e '
+b label4
+:label3
+s/^/label3_/p
+b end
+:label4
+2,12b label1
+b label2
+:label1
+s/^/label1_/p
+b
+:label2
+s/^/label2_/p
+b label3
+:end
+' lines1
+ mark '5.2' ; $SED -n -e '
+s/l1_/l2_/
+t ok
+b
+:ok
+s/^/tested /p
+' lines1 lines2
+ mark '5.3' ; $SED -n -e '
+5,8b inside
+1,5 {
+ s/^/^/p
+ :inside
+ s/$/$/p
+}
+' lines1
+# Check that t clears the substitution done flag
+ mark '5.4' ; $SED -n -e '
+1,8s/^/^/
+t l1
+:l1
+t l2
+s/$/$/p
+b
+:l2
+s/^/ERROR/
+' lines1
+# Check that reading a line clears the substitution done flag
+ mark '5.5' ; $SED -n -e '
+t l2
+1,8s/^/^/p
+2,7N
+b
+:l2
+s/^/ERROR/p
+' lines1
+ mark '5.6' ; $SED 5q lines1
+ mark '5.7' ; $SED -e '
+5i\
+hello
+5q' lines1
+# Branch across block boundary
+ mark '5.8' ; $SED -e '
+{
+:b
+}
+s/l/m/
+tb' lines1
+}
+
+test_pattern()
+{
+echo Pattern space commands
+# Check that the pattern space is deleted
+ mark '6.1' ; $SED -n -e '
+c\
+changed
+p
+' lines1
+ mark '6.2' ; $SED -n -e '
+4d
+p
+' lines1
+ mark '6.3' ; $SED -e '
+N
+N
+N
+D
+P
+4p
+' lines1
+ mark '6.4' ; $SED -e '
+2h
+3H
+4g
+5G
+6x
+6p
+6x
+6p
+' lines1
+ mark '6.5' ; $SED -e '4n' lines1
+ mark '6.6' ; $SED -n -e '4n' lines1
+}
+
+test_print()
+{
+ echo Testing print and file routines
+ awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \
+ </dev/null >lines3
+ mark '7.1' ; $SED -n l lines3
+ mark '7.2' ; $SED -e '/l2_/=' lines1 lines2
+ rm -f lines4
+ mark '7.3' ; $SED -e '3,12w lines4' lines1
+ echo w results
+ cat lines4
+ mark '7.4' ; $SED -e '4r lines2' lines1
+ mark '7.5' ; $SED -e '5r /dev/dds' lines1
+ mark '7.6' ; $SED -e '6r /dev/null' lines1
+ # mark '7.7'
+ # sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1
+ # rm -rf tmpdir
+ # mkdir tmpdir
+ # $SED -f script1 lines1
+ # cat tmpdir/*
+ # rm -rf tmpdir
+ mark '7.8'
+ echo line1 > lines3
+ echo "" >> lines3
+ $SED -n -e '$p' lines3 /dev/null
+}
+
+test_subst()
+{
+ echo Testing substitution commands
+ mark '8.1' ; $SED -e 's/./X/g' lines1
+ mark '8.2' ; $SED -e 's,.,X,g' lines1
+ mark '8.3' ; $SED -e 's.\..X.g' lines1
+# POSIX does not say that this should work
+# mark '8.4' ; $SED -e 's/[/]/Q/' lines1
+ mark '8.4' ; $SED -e 's/[\/]/Q/' lines1
+ mark '8.5' ; $SED -e 's_\__X_' lines1
+ mark '8.6' ; $SED -e 's/./(&)/g' lines1
+ mark '8.7' ; $SED -e 's/./(\&)/g' lines1
+ mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1
+ mark '8.9' ; $SED -e 's/_/u0\
+u1\
+u2/g' lines1
+ mark '8.10' ; $SED -e 's/./X/4' lines1
+ rm -f lines4
+ mark '8.11' ; $SED -e 's/1/X/w lines4' lines1
+ echo s wfile results
+ cat lines4
+ mark '8.12' ; $SED -e 's/[123]/X/g' lines1
+ mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1
+ mark '8.14' ; $SED -e 'y10\123456789198765432\101' lines1
+ mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1
+ mark '8.16'
+ echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \
+ -e 's//Y/p' -e '/f/bx'
+}
+
+test_error()
+{
+ $SED -x && exit 1
+ $SED -f && exit 1
+ $SED -e && exit 1
+ $SED -f /dev/dds && exit 1
+ $SED p /dev/dds && exit 1
+ $SED -f /bin/sh && exit 1
+ $SED '{' && exit 1
+ $SED '{' && exit 1
+ $SED '/hello/' && exit 1
+ $SED '1,/hello/' && exit 1
+ $SED -e '-5p' && exit 1
+ $SED '/jj' && exit 1
+ # $SED 'a hello' && exit 1
+ # $SED 'a \ hello' && exit 1
+ $SED 'b foo' && exit 1
+ $SED 'd hello' && exit 1
+ $SED 's/aa' && exit 1
+ $SED 's/aa/' && exit 1
+ $SED 's/a/b' && exit 1
+ $SED 's/a/b/c/d' && exit 1
+ $SED 's/a/b/ 1 2' && exit 1
+ # $SED 's/a/b/ 1 g' && exit 1
+ $SED 's/a/b/w' && exit 1
+ $SED 'y/aa' && exit 1
+ $SED 'y/aa/b/' && exit 1
+ $SED 'y/aa/' && exit 1
+ $SED 'y/a/b' && exit 1
+ $SED 'y/a/b/c/d' && exit 1
+ $SED '!' && exit 1
+ $SED supercalifrangolisticexprialidociussupercalifrangolisticexcius
+}
+
+main ${1+"$@"}
diff --git a/testsuite/bug-regex10.c b/testsuite/bug-regex10.c
new file mode 100644
index 0000000..5cb0146
--- /dev/null
+++ b/testsuite/bug-regex10.c
@@ -0,0 +1,61 @@
+/* Test for re_match with non-zero start.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <regex.h>
+
+int
+main (void)
+{
+ struct re_pattern_buffer regex;
+ struct re_registers regs;
+ const char *s;
+ int match;
+ int result = 0;
+
+ regs.num_regs = 1;
+ memset (&regex, '\0', sizeof (regex));
+ s = re_compile_pattern ("[abc]*d", 7, &regex);
+ if (s != NULL)
+ {
+ puts ("re_compile_pattern return non-NULL value");
+ result = 1;
+ }
+ else
+ {
+ match = re_match (&regex, "foacabdxy", 9, 2, &regs);
+ if (match != 5)
+ {
+ printf ("re_match returned %d, expected 5\n", match);
+ result = 1;
+ }
+ else if (regs.start[0] != 2 || regs.end[0] != 7)
+ {
+ printf ("re_match returned %d..%d, expected 2..7\n",
+ regs.start[0], regs.end[0]);
+ result = 1;
+ }
+ puts (" -> OK");
+ }
+
+ return result;
+}
diff --git a/testsuite/bug-regex11.c b/testsuite/bug-regex11.c
new file mode 100644
index 0000000..29fa7de
--- /dev/null
+++ b/testsuite/bug-regex11.c
@@ -0,0 +1,135 @@
+/* Regular expression tests.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Tests supposed to match. */
+struct
+{
+ const char *pattern;
+ const char *string;
+ int flags, nmatch;
+ regmatch_t rm[5];
+} tests[] = {
+ /* Test for newline handling in regex. */
+ { "[^~]*~", "\nx~y", 0, 2, { { 0, 3 }, { -1, -1 } } },
+ /* Other tests. */
+ { "a(.*)b", "a b", REG_EXTENDED, 2, { { 0, 3 }, { 1, 2 } } },
+ { ".*|\\([KIO]\\)\\([^|]*\\).*|?[KIO]", "10~.~|P|K0|I10|O16|?KSb", 0, 3,
+ { { 0, 21 }, { 15, 16 }, { 16, 18 } } },
+ { ".*|\\([KIO]\\)\\([^|]*\\).*|?\\1", "10~.~|P|K0|I10|O16|?KSb", 0, 3,
+ { { 0, 21 }, { 8, 9 }, { 9, 10 } } },
+ { "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)",
+ "a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0", 0,
+ 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } },
+ /* Test for BRE expression anchoring. POSIX says just that this may match;
+ in glibc regex it always matched, so avoid changing it. */
+ { "\\(^\\|foo\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } },
+ { "\\(foo\\|^\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } },
+ /* In ERE this must be treated as an anchor. */
+ { "(^|foo)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } },
+ { "(foo|^)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } },
+ /* Here ^ cannot be treated as an anchor according to POSIX. */
+ { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
+ { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
+ /* More tests on backreferences. */
+ { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
+ { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } },
+ { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
+ { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
+ { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
+ { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } },
+ { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
+ { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } },
+ { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
+ { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
+ { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } },
+ { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
+ { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } },
+ { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } },
+ { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } },
+ { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$",
+ "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
+ { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
+ "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
+ { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
+ "abcdedcba", REG_EXTENDED, 1, { { 0, 9 } } },
+#if 0
+ /* XXX Not used since they fail so far. */
+ { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$|^.?$",
+ "ababababa", REG_EXTENDED, 1, { { 0, 9 } } },
+ { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
+ "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
+ { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
+ "ababababa", REG_EXTENDED, 1, { { 0, 9 } } },
+#endif
+};
+
+int
+main (void)
+{
+ regex_t re;
+ regmatch_t rm[5];
+ size_t i;
+ int n, ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
+ {
+ n = regcomp (&re, tests[i].pattern, tests[i].flags);
+ if (n != 0)
+ {
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("%s: regcomp %zd failed: %s\n", tests[i].pattern, i, buf);
+ ret = 1;
+ continue;
+ }
+
+ if (regexec (&re, tests[i].string, tests[i].nmatch, rm, 0))
+ {
+ printf ("%s: regexec %zd failed\n", tests[i].pattern, i);
+ ret = 1;
+ regfree (&re);
+ continue;
+ }
+
+ for (n = 0; n < tests[i].nmatch; ++n)
+ if (rm[n].rm_so != tests[i].rm[n].rm_so
+ || rm[n].rm_eo != tests[i].rm[n].rm_eo)
+ {
+ if (tests[i].rm[n].rm_so == -1 && tests[i].rm[n].rm_eo == -1)
+ break;
+ printf ("%s: regexec %zd match failure rm[%d] %d..%d\n",
+ tests[i].pattern, i, n, rm[n].rm_so, rm[n].rm_eo);
+ ret = 1;
+ break;
+ }
+
+ regfree (&re);
+ }
+
+ return ret;
+}
diff --git a/testsuite/bug-regex12.c b/testsuite/bug-regex12.c
new file mode 100644
index 0000000..0ad063e
--- /dev/null
+++ b/testsuite/bug-regex12.c
@@ -0,0 +1,73 @@
+/* Regular expression tests.
+ Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Tests supposed to not match. */
+struct
+{
+ const char *pattern;
+ const char *string;
+ int flags, nmatch;
+} tests[] = {
+ { "^<\\([^~]*\\)\\([^~]\\)[^~]*~\\1\\(.\\).*|=.*\\3.*\\2",
+ "<,.8~2,~so-|=-~.0,123456789<><", REG_NOSUB, 0 },
+ /* In ERE, all carets must be treated as anchors. */
+ { "a^b", "a^b", REG_EXTENDED, 0 }
+};
+
+int
+main (void)
+{
+ regex_t re;
+ regmatch_t rm[4];
+ size_t i;
+ int n, ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
+ {
+ n = regcomp (&re, tests[i].pattern, tests[i].flags);
+ if (n != 0)
+ {
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("regcomp %zd failed: %s\n", i, buf);
+ ret = 1;
+ continue;
+ }
+
+ if (! regexec (&re, tests[i].string, tests[i].nmatch,
+ tests[i].nmatch ? rm : NULL, 0))
+ {
+ printf ("regexec %zd incorrectly matched\n", i);
+ ret = 1;
+ }
+
+ regfree (&re);
+ }
+
+ return ret;
+}
diff --git a/testsuite/bug-regex13.c b/testsuite/bug-regex13.c
new file mode 100644
index 0000000..df1c95d
--- /dev/null
+++ b/testsuite/bug-regex13.c
@@ -0,0 +1,103 @@
+/* Regular expression tests.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static struct
+{
+ int syntax;
+ const char *pattern;
+ const char *string;
+ int start;
+} tests[] = {
+ {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match. */
+ {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match. */
+ {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abbc", -1},
+ /* Nested duplication. */
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "ac", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abc", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "ac", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbc", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abbc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "ac", 0},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abc", -1},
+ {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abbc", -1},
+};
+
+int
+main (void)
+{
+ struct re_pattern_buffer regbuf;
+ const char *err;
+ size_t i;
+ int ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
+ {
+ int start;
+ re_set_syntax (tests[i].syntax);
+ memset (&regbuf, '\0', sizeof (regbuf));
+ err = re_compile_pattern (tests[i].pattern, strlen (tests[i].pattern),
+ &regbuf);
+ if (err != NULL)
+ {
+ printf ("re_compile_pattern failed: %s\n", err);
+ ret = 1;
+ continue;
+ }
+
+ start = re_search (&regbuf, tests[i].string, strlen (tests[i].string),
+ 0, strlen (tests[i].string), NULL);
+ if (start != tests[i].start)
+ {
+ printf ("re_search failed %d\n", start);
+ ret = 1;
+ regfree (&regbuf);
+ continue;
+ }
+ regfree (&regbuf);
+ }
+
+ return ret;
+}
diff --git a/testsuite/bug-regex14.c b/testsuite/bug-regex14.c
new file mode 100644
index 0000000..91ff32a
--- /dev/null
+++ b/testsuite/bug-regex14.c
@@ -0,0 +1,54 @@
+/* Tests re_comp and re_exec.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define _REGEX_RE_COMP
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (void)
+{
+ const char *err;
+ size_t i;
+ int ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < 100; ++i)
+ {
+ err = re_comp ("a t.st");
+ if (err)
+ {
+ printf ("re_comp failed: %s\n", err);
+ ret = 1;
+ }
+
+ if (! re_exec ("This is a test."))
+ {
+ printf ("re_exec failed\n");
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
diff --git a/testsuite/bug-regex15.c b/testsuite/bug-regex15.c
new file mode 100644
index 0000000..10526b1
--- /dev/null
+++ b/testsuite/bug-regex15.c
@@ -0,0 +1,47 @@
+/* Test for memory/CPU leak in regcomp. */
+
+#include <error.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define TEST_DATA_LIMIT (32 << 20)
+
+int
+main ()
+{
+#ifdef RLIMIT_DATA
+ regex_t re;
+ int reerr;
+
+ /* Try to avoid eating all memory if a test leaks. */
+ struct rlimit data_limit;
+ if (getrlimit (RLIMIT_DATA, &data_limit) == 0)
+ {
+ if ((rlim_t) TEST_DATA_LIMIT > data_limit.rlim_max)
+ data_limit.rlim_cur = data_limit.rlim_max;
+ else if (data_limit.rlim_cur > (rlim_t) TEST_DATA_LIMIT)
+ data_limit.rlim_cur = (rlim_t) TEST_DATA_LIMIT;
+ if (setrlimit (RLIMIT_DATA, &data_limit) < 0)
+ perror ("setrlimit: RLIMIT_DATA");
+ }
+ else
+ perror ("getrlimit: RLIMIT_DATA");
+
+ reerr = regcomp (&re, "^6?3?[25]?5?[14]*[25]*[69]*+[58]*87?4?$",
+ REG_EXTENDED | REG_NOSUB);
+ if (reerr != 0)
+ {
+ char buf[100];
+ regerror (reerr, &re, buf, sizeof buf);
+ printf ("regerror %s\n", buf);
+ return 1;
+ }
+
+ return 0;
+#else
+ return 77;
+#endif
+}
diff --git a/testsuite/bug-regex16.c b/testsuite/bug-regex16.c
new file mode 100644
index 0000000..1e41ccb
--- /dev/null
+++ b/testsuite/bug-regex16.c
@@ -0,0 +1,35 @@
+/* Test re_compile_pattern error messages. */
+
+#include <stdio.h>
+#include <string.h>
+#include <regex.h>
+
+int
+main (void)
+{
+ struct re_pattern_buffer re;
+ const char *s;
+ int ret = 0;
+
+ re_set_syntax (RE_SYNTAX_POSIX_EGREP);
+ memset (&re, 0, sizeof (re));
+ s = re_compile_pattern ("[[.invalid_collating_symbol.]]", 30, &re);
+ if (s == NULL || strcmp (s, "Invalid collation character"))
+ {
+ printf ("re_compile_pattern returned %s\n", s);
+ ret = 1;
+ }
+ s = re_compile_pattern ("[[=invalid_equivalence_class=]]", 31, &re);
+ if (s == NULL || strcmp (s, "Invalid collation character"))
+ {
+ printf ("re_compile_pattern returned %s\n", s);
+ ret = 1;
+ }
+ s = re_compile_pattern ("[[:invalid_character_class:]]", 29, &re);
+ if (s == NULL || strcmp (s, "Invalid character class name"))
+ {
+ printf ("re_compile_pattern returned %s\n", s);
+ ret = 1;
+ }
+ return ret;
+}
diff --git a/testsuite/bug-regex21.c b/testsuite/bug-regex21.c
new file mode 100644
index 0000000..d67c4fe
--- /dev/null
+++ b/testsuite/bug-regex21.c
@@ -0,0 +1,45 @@
+/* Test for memory leaks in regcomp.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+
+int main (void)
+{
+ regex_t re;
+ int i;
+ int ret = 0;
+
+ mtrace ();
+
+ for (i = 0; i < 32; ++i)
+ {
+ if (regcomp (&re, "X-.+:.+Y=\".*\\.(A|B|C|D|E|F|G|H|I",
+ REG_EXTENDED | REG_ICASE) == 0)
+ {
+ puts ("regcomp unexpectedly succeeded");
+ ret = 1;
+ }
+ else
+ regfree (&re);
+ }
+ return ret;
+}
diff --git a/testsuite/bug-regex7.c b/testsuite/bug-regex7.c
new file mode 100644
index 0000000..4459459
--- /dev/null
+++ b/testsuite/bug-regex7.c
@@ -0,0 +1,92 @@
+/* Test for regs allocation in re_search and re_match.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Stepan Kasal <kasal@math.cas.cz>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+
+
+int
+main (void)
+{
+ struct re_pattern_buffer regex;
+ struct re_registers regs;
+ const char *s;
+ int match, n;
+ int result = 0;
+
+ memset (&regex, '\0', sizeof (regex));
+ regs.start = regs.end = NULL;
+ regs.num_regs = 0;
+ s = re_compile_pattern ("a", 1, &regex);
+ if (s != NULL)
+ {
+ puts ("failed to compile pattern \"a\"");
+ result = 1;
+ }
+ else
+ {
+ match = re_search (&regex, "baobab", 6, 0, 6, &regs);
+ n = 1;
+ if (match != 1)
+ {
+ printf ("re_search returned %d, expected 1\n", match);
+ result = 1;
+ }
+ else if (regs.num_regs <= n || regs.start[n] != -1 || regs.end[n] != -1)
+ {
+ puts ("re_search failed to fill the -1 sentinel");
+ result = 1;
+ }
+ }
+
+ free (regex.buffer);
+ memset (&regex, '\0', sizeof (regex));
+
+ s = re_compile_pattern ("\\(\\(\\(a\\)\\)\\)", 13, &regex);
+ if (s != NULL)
+ {
+ puts ("failed to compile pattern /\\(\\(\\(a\\)\\)\\)/");
+ result = 1;
+ }
+ else
+ {
+ match = re_match (&regex, "apl", 3, 0, &regs);
+ n = 4;
+ if (match != 1)
+ {
+ printf ("re_match returned %d, expected 1\n", match);
+ result = 1;
+ }
+ else if (regs.num_regs <= n || regs.start[n] != -1 || regs.end[n] != -1)
+ {
+ puts ("re_match failed to fill the -1 sentinel");
+ result = 1;
+ }
+ }
+
+ if (result == 0)
+ puts (" -> OK");
+
+ return result;
+}
diff --git a/testsuite/bug-regex8.c b/testsuite/bug-regex8.c
new file mode 100644
index 0000000..8383e01
--- /dev/null
+++ b/testsuite/bug-regex8.c
@@ -0,0 +1,84 @@
+/* Test for the STOP parameter of re_match_2 and re_search_2.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Stepan Kasal <kasal@math.cas.cz>, 2002.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <regex.h>
+
+
+int
+main (void)
+{
+ struct re_pattern_buffer regex;
+ const char *s;
+ int match[4];
+
+ memset (&regex, '\0', sizeof (regex));
+
+ s = re_compile_pattern ("xy$", 3, &regex);
+ if (s != NULL)
+ {
+ puts ("failed to compile pattern \"xy$\"");
+ return 1;
+ }
+ else
+ match[0] = re_match_2(&regex,"xyz",3,NULL,0,0,NULL,2);
+
+ free (regex.buffer);
+ memset (&regex, '\0', sizeof (regex));
+
+ s = re_compile_pattern ("xy\\>", 4, &regex);
+ if (s != NULL)
+ {
+ puts ("failed to compile pattern \"xy\\>\"");
+ return 1;
+ }
+ else
+ match[1] = re_search_2(&regex,"xyz",3,NULL,0,0,2,NULL,2);
+
+ free (regex.buffer);
+ memset (&regex, '\0', sizeof (regex));
+
+ s = re_compile_pattern ("xy \\<", 5, &regex);
+ if (s != NULL)
+ {
+ puts ("failed to compile pattern \"xy \\<\"");
+ return 1;
+ }
+ else
+ {
+ match[2] = re_match_2(&regex,"xy ",4,NULL,0,0,NULL,3);
+ match[3] = re_match_2(&regex,"xy z",4,NULL,0,0,NULL,3);
+ }
+
+ if (match[0] != -1 || match[1] != -1 || match[2] != -1 || match[3] != 3)
+ {
+ printf ("re_{match,search}_2 returned %d,%d,%d,%d, expected -1,-1,-1,3\n",
+ match[0], match[1], match[2], match[3]);
+ return 1;
+ }
+
+ puts (" -> OK");
+
+ return 0;
+}
diff --git a/testsuite/bug-regex9.c b/testsuite/bug-regex9.c
new file mode 100644
index 0000000..5a32668
--- /dev/null
+++ b/testsuite/bug-regex9.c
@@ -0,0 +1,67 @@
+/* Test for memory handling in regex.
+ Copyright (C) 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+
+static const char text[] = "#! /bin/sh";
+
+int
+main (void)
+{
+ regex_t re;
+ regmatch_t rm[2];
+ int n;
+
+ mtrace ();
+
+ n = regcomp (&re, "^#! */.*/(k|ba||pdk|z)sh", REG_EXTENDED);
+ if (n != 0)
+ {
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("regcomp failed: %s\n", buf);
+ exit (1);
+ }
+
+ for (n = 0; n < 20; ++n)
+ {
+ if (regexec (&re, text, 2, rm, 0))
+ {
+ puts ("regexec failed");
+ exit (2);
+ }
+ if (rm[0].rm_so != 0 || rm[0].rm_eo != 10
+ || rm[1].rm_so != 8 || rm[1].rm_eo != 8)
+ {
+ printf ("regexec match failure: %d %d %d %d\n",
+ rm[0].rm_so, rm[0].rm_eo, rm[1].rm_so, rm[1].rm_eo);
+ exit (3);
+ }
+ }
+
+ regfree (&re);
+
+ return 0;
+}
diff --git a/testsuite/classes.good b/testsuite/classes.good
new file mode 100644
index 0000000..58f96f4
--- /dev/null
+++ b/testsuite/classes.good
@@ -0,0 +1,4 @@
+: ${_cv_='emptyvar'}
+: ${ac_cv_prog/RANLIB='/usr/bin/ranlib'}
+: ${ac_cv_prog/CC='/usr/unsupported/\ \ /lib/_cv_/cc'}
+: ${a/c_cv_prog/CPP='/usr/bin/cpp'}
diff --git a/testsuite/classes.inp b/testsuite/classes.inp
new file mode 100644
index 0000000..f1314b6
--- /dev/null
+++ b/testsuite/classes.inp
@@ -0,0 +1,6 @@
+_cv_=emptyvar
+ac_cv_prog/RANLIB=/usr/bin/ranlib
+ac_cv_prog/CC=/usr/unsupported/\ \ /lib/_cv_/cc
+a/c_cv_prog/CPP=/usr/bin/cpp
+SHELL=bash
+GNU=GNU!UNIX
diff --git a/testsuite/classes.sed b/testsuite/classes.sed
new file mode 100644
index 0000000..897651f
--- /dev/null
+++ b/testsuite/classes.sed
@@ -0,0 +1,2 @@
+# inspired by an autoconf generated configure script.
+s/^\([/[:lower:]A-Z0-9]*_cv_[[:lower:][:upper:]/[:digit:]]*\)=\(.*\)/: \${\1='\2'}/p
diff --git a/testsuite/cv-vars.good b/testsuite/cv-vars.good
new file mode 100644
index 0000000..14707bf
--- /dev/null
+++ b/testsuite/cv-vars.good
@@ -0,0 +1,4 @@
+: ${_cv_='emptyvar'}
+: ${ac_cv_prog_RANLIB='/usr/bin/ranlib'}
+: ${ac_cv_prog_CC='/usr/unsupported/\ \ /lib/_cv_/cc'}
+: ${ac_cv_prog_CPP='/usr/bin/cpp'}
diff --git a/testsuite/cv-vars.inp b/testsuite/cv-vars.inp
new file mode 100644
index 0000000..5e92b1c
--- /dev/null
+++ b/testsuite/cv-vars.inp
@@ -0,0 +1,6 @@
+_cv_=emptyvar
+ac_cv_prog_RANLIB=/usr/bin/ranlib
+ac_cv_prog_CC=/usr/unsupported/\ \ /lib/_cv_/cc
+ac_cv_prog_CPP=/usr/bin/cpp
+SHELL=bash
+GNU=GNU!UNIX
diff --git a/testsuite/cv-vars.sed b/testsuite/cv-vars.sed
new file mode 100644
index 0000000..e49c73e
--- /dev/null
+++ b/testsuite/cv-vars.sed
@@ -0,0 +1,2 @@
+# inspired by an autoconf generated configure script.
+s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/: \${\1='\2'}/p
diff --git a/testsuite/dc.good b/testsuite/dc.good
new file mode 100644
index 0000000..bfdc330
--- /dev/null
+++ b/testsuite/dc.good
@@ -0,0 +1,3 @@
+31
+March 2002
+1.6A09E667A \ No newline at end of file
diff --git a/testsuite/dc.inp b/testsuite/dc.inp
new file mode 100644
index 0000000..e4b731b
--- /dev/null
+++ b/testsuite/dc.inp
@@ -0,0 +1,14 @@
+# Compute Easter of 2002...
+# usage: (echo YEAR; cat easter.dc) | dc
+
+2002
+
+[ddsf[lfp[too early
+]Pq]s@1583>@
+ddd19%1+sg100/1+d3*4/12-sx8*5+25/5-sz5*4/lx-10-sdlg11*20+lz+lx-30%
+d[30+]s@0>@d[[1+]s@lg11<@]s@25=@d[1+]s@24=@se44le-d[30+]s@21>@dld+7%-7+
+[March ]smd[31-[April ]sm]s@31<@psnlmPpsn1z>p]splpx
+
+# Compute square root of 2
+
+16oAk2vpq \ No newline at end of file
diff --git a/testsuite/dc.sed b/testsuite/dc.sed
new file mode 100644
index 0000000..5267589
--- /dev/null
+++ b/testsuite/dc.sed
@@ -0,0 +1,322 @@
+#!/bin/sed -nf
+# dc.sed - an arbitrary precision RPN calculator
+# Created by Greg Ubben <gsu@romulus.ncsc.mil> early 1995, late 1996
+#
+# Dedicated to MAC's memory of the IBM 1620 ("CADET") computer.
+# @(#)GSU dc.sed 1.1 06-Mar-1999 [non-explanatory]
+#
+# Examples:
+# sqrt(2) to 10 digits: echo "10k 2vp" | dc.sed
+# 20 factorial: echo "[d1-d1<!*]s! 20l!xp" | dc.sed
+# sin(ln(7)): echo "s(l(7))" | bc -c /usr/lib/lib.b | dc.sed
+# hex to base 60: echo "60o16i 6B407.CAFE p" | dc.sed
+# tests most of dc.sed: echo 16oAk2vp | dc.sed
+#
+# To debug or analyze, give the dc Y command as input or add it to
+# embedded dc routines, or add the sed p command to the beginning of
+# the main loop or at various points in the low-level sed routines.
+# If you need to allow [|~] characters in the input, filter this
+# script through "tr '|~' '\36\37'" first (or use dc.pl).
+#
+# Not implemented: ! \
+# But implemented: K Y t # !< !> != fractional-bases
+# SunOS limits: 199/199 commands (though could pack in 10-20 more)
+# Limitations: scale <= 999; |obase| >= 1; input digits in [0..F]
+# Completed: 1am Feb 4, 1997
+
+s/^/|P|K0|I10|O10|?~/
+
+: next
+s/|?./|?/
+s/|?#[ -}]*/|?/
+/|?!*[lLsS;:<>=]\{0,1\}$/N
+/|?!*[-+*/%^<>=]/b binop
+/^|.*|?[dpPfQXZvxkiosStT;:]/b binop
+/|?[_0-9A-F.]/b number
+/|?\[/b string
+/|?l/b load
+/|?L/b Load
+/|?[sS]/b save
+/|?c/ s/[^|]*//
+/|?d/ s/[^~]*~/&&/
+/|?f/ s//&[pSbz0<aLb]dSaxsaLa/
+/|?x/ s/\([^~]*~\)\(.*|?x\)~*/\2\1/
+/|?[KIO]/ s/.*|\([KIO]\)\([^|]*\).*|?\1/\2~&/
+/|?T/ s/\.*0*~/~/
+# a slow, non-stackable array implementation in dc, just for completeness
+# A fast, stackable, associative array implementation could be done in sed
+# (format: {key}value{key}value...), but would be longer, like load & save.
+/|?;/ s/|?;\([^{}]\)/|?~[s}s{L{s}q]S}[S}l\1L}1-d0>}s\1L\1l{xS\1]dS{xL}/
+/|?:/ s/|?:\([^{}]\)/|?~[s}L{s}L{s}L}s\1q]S}S}S{[L}1-d0>}S}l\1s\1L\1l{xS\1]dS{x/
+/|?[ ~ cdfxKIOT]/b next
+/|?\n/b next
+/|?[pP]/b print
+/|?k/ s/^\([0-9]\{1,3\}\)\([.~].*|K\)[^|]*/\2\1/
+/|?i/ s/^\(-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}\)\(~.*|I\)[^|]*/\2\1/
+/|?o/ s/^\(-\{0,1\}[1-9][0-9]*\.\{0,1\}[0-9]*\)\(~.*|O\)[^|]*/\2\1/
+/|?[kio]/b pop
+/|?t/b trunc
+/|??/b input
+/|?Q/b break
+/|?q/b quit
+h
+/|?[XZz]/b count
+/|?v/b sqrt
+s/.*|?\([^Y]\).*/\1 is unimplemented/
+s/\n/\\n/g
+l
+g
+b next
+
+: print
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~.*|?p/!b Print
+/|O10|/b Print
+
+# Print a number in a non-decimal output base. Uses registers a,b,c,d.
+# Handles fractional output bases (O<-1 or O>=1), unlike other dc's.
+# Converts the fraction correctly on negative output bases, unlike
+# UNIX dc. Also scales the fraction more accurately than UNIX dc.
+#
+s,|?p,&KSa0kd[[-]Psa0la-]Sad0>a[0P]sad0=a[A*2+]saOtd0>a1-ZSd[[[[ ]P]sclb1\
+!=cSbLdlbtZ[[[-]P0lb-sb]sclb0>c1+]sclb0!<c[0P1+dld>c]scdld>cscSdLbP]q]Sb\
+[t[1P1-d0<c]scd0<c]ScO_1>bO1!<cO[16]<bOX0<b[[q]sc[dSbdA>c[A]sbdA=c[B]sbd\
+B=c[C]sbdC=c[D]sbdD=c[E]sbdE=c[F]sb]xscLbP]~Sd[dtdZOZ+k1O/Tdsb[.5]*[.1]O\
+X^*dZkdXK-1+ktsc0kdSb-[Lbdlb*lc+tdSbO*-lb0!=aldx]dsaxLbsb]sad1!>a[[.]POX\
++sb1[SbO*dtdldx-LbO*dZlb!<a]dsax]sadXd0<asbsasaLasbLbscLcsdLdsdLdLak[]pP,
+b next
+
+: Print
+/|?p/s/[^~]*/&\
+~&/
+s/\(.*|P\)\([^|]*\)/\
+\2\1/
+s/\([^~]*\)\n\([^~]*\)\(.*|P\)/\1\3\2/
+h
+s/~.*//
+/./{ s/.//; p; }
+# Just s/.//p would work if we knew we were running under the -n option.
+# Using l vs p would kind of do \ continuations, but would break strings.
+g
+
+: pop
+s/[^~]*~//
+b next
+
+: load
+s/\(.*|?.\)\(.\)/\20~\1/
+s/^\(.\)0\(.*|r\1\([^~|]*\)~\)/\1\3\2/
+s/.//
+b next
+
+: Load
+s/\(.*|?.\)\(.\)/\2\1/
+s/^\(.\)\(.*|r\1\)\([^~|]*~\)/|\3\2/
+/^|/!i\
+register empty
+s/.//
+b next
+
+: save
+s/\(.*|?.\)\(.\)/\2\1/
+/^\(.\).*|r\1/ !s/\(.\).*|/&r\1|/
+/|?S/ s/\(.\).*|r\1/&~/
+s/\(.\)\([^~]*~\)\(.*|r\1\)[^~|]*~\{0,1\}/\3\2/
+b next
+
+: quit
+t quit
+s/|?[^~]*~[^~]*~/|?q/
+t next
+# Really should be using the -n option to avoid printing a final newline.
+s/.*|P\([^|]*\).*/\1/
+q
+
+: break
+s/[0-9]*/&;987654321009;/
+: break1
+s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^;]*\3\(9*\).*|?.\)[^~]*~/\1\5\6\4/
+t break1
+b pop
+
+: input
+N
+s/|??\(.*\)\(\n.*\)/|?\2~\1/
+b next
+
+: count
+/|?Z/ s/~.*//
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}$/ s/[-.0]*\([^.]*\)\.*/\1/
+/|?X/ s/-*[0-9A-F]*\.*\([0-9A-F]*\).*/\1/
+s/|.*//
+/~/ s/[^~]//g
+
+s/./a/g
+: count1
+ s/a\{10\}/b/g
+ s/b*a*/&a9876543210;/
+ s/a.\{9\}\(.\).*;/\1/
+ y/b/a/
+/a/b count1
+G
+/|?z/ s/\n/&~/
+s/\n[^~]*//
+b next
+
+: trunc
+# for efficiency, doesn't pad with 0s, so 10k 2 5/ returns just .40
+# The X* here and in a couple other places works around a SunOS 4.x sed bug.
+s/\([^.~]*\.*\)\(.*|K\([^|]*\)\)/\3;9876543210009909:\1,\2/
+: trunc1
+ s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^:]*X*\3\(9*\)[^,]*\),\([0-9]\)/\1\5\6\4\7,/
+t trunc1
+s/[^:]*:\([^,]*\)[^~]*/\1/
+b normal
+
+: number
+s/\(.*|?\)\(_\{0,1\}[0-9A-F]*\.\{0,1\}[0-9A-F]*\)/\2~\1~/
+s/^_/-/
+/^[^A-F~]*~.*|I10|/b normal
+/^[-0.]*~/b normal
+s:\([^.~]*\)\.*\([^~]*\):[Ilb^lbk/,\1\2~0A1B2C3D4E5F1=11223344556677889900;.\2:
+: digit
+ s/^\([^,]*\),\(-*\)\([0-F]\)\([^;]*\(.\)\3[^1;]*\(1*\)\)/I*+\1\2\6\5~,\2\4/
+t digit
+s:...\([^/]*.\)\([^,]*\)[^.]*\(.*|?.\):\2\3KSb[99]k\1]SaSaXSbLalb0<aLakLbktLbk:
+b next
+
+: string
+/|?[^]]*$/N
+s/\(|?[^]]*\)\[\([^]]*\)]/\1|{\2|}/
+/|?\[/b string
+s/\(.*|?\)|{\(.*\)|}/\2~\1[/
+s/|{/[/g
+s/|}/]/g
+b next
+
+: binop
+/^[^~|]*~[^|]/ !i\
+stack empty
+//!b next
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/[^~]*\(.*|?!*[^!=<>]\)/0\1/
+/^[^~]*~-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/~[^~]*\(.*|?!*[^!=<>]\)/~0\1/
+h
+/|?\*/b mul
+/|?\//b div
+/|?%/b rem
+/|?^/b exp
+
+/|?[+-]/ s/^\(-*\)\([^~]*~\)\(-*\)\([^~]*~\).*|?\(-\{0,1\}\).*/\2\4s\3o\1\3\5/
+s/\([^.~]*\)\([^~]*~[^.~]*\)\(.*\)/<\1,\2,\3|=-~.0,123456789<></
+/^<\([^,]*,[^~]*\)\.*0*~\1\.*0*~/ s/</=/
+: cmp1
+ s/^\(<[^,]*\)\([0-9]\),\([^,]*\)\([0-9]\),/\1,\2\3,\4/
+t cmp1
+/^<\([^~]*\)\([^~]\)[^~]*~\1\(.\).*|=.*\3.*\2/ s/</>/
+/|?/{
+ s/^\([<>]\)\(-[^~]*~-.*\1\)\(.\)/\3\2/
+ s/^\(.\)\(.*|?!*\)\1/\2!\1/
+ s/|?![^!]\(.\)/&l\1x/
+ s/[^~]*~[^~]*~\(.*|?\)!*.\(.*\)|=.*/\1\2/
+ b next
+}
+s/\(-*\)\1|=.*/;9876543210;9876543210/
+/o-/ s/;9876543210/;0123456789/
+s/^>\([^~]*~\)\([^~]*~\)s\(-*\)\(-*o\3\(-*\)\)/>\2\1s\5\4/
+
+s/,\([0-9]*\)\.*\([^,]*\),\([0-9]*\)\.*\([0-9]*\)/\1,\2\3.,\4;0/
+: right1
+ s/,\([0-9]\)\([^,]*\),;*\([0-9]\)\([0-9]*\);*0*/\1,\2\3,\4;0/
+t right1
+s/.\([^,]*\),~\(.*\);0~s\(-*\)o-*/\1~\30\2~/
+
+: addsub1
+ s/\(.\{0,1\}\)\(~[^,]*\)\([0-9]\)\(\.*\),\([^;]*\)\(;\([^;]*\(\3[^;]*\)\).*X*\1\(.*\)\)/\2,\4\5\9\8\7\6/
+ s/,\([^~]*~\).\{10\}\(.\)[^;]\{0,9\}\([^;]\{0,1\}\)[^;]*/,\2\1\3/
+# could be done in one s/// if we could have >9 back-refs...
+/^~.*~;/!b addsub1
+
+: endbin
+s/.\([^,]*\),\([0-9.]*\).*/\1\2/
+G
+s/\n[^~]*~[^~]*//
+
+: normal
+s/^\(-*\)0*\([0-9.]*[0-9]\)[^~]*/\1\2/
+s/^[^1-9~]*~/0~/
+b next
+
+: mul
+s/\(-*\)\([0-9]*\)\.*\([0-9]*\)~\(-*\)\([0-9]*\)\.*\([0-9]*\).*|K\([^|]*\).*/\1\4\2\5.!\3\6,|\2<\3~\5>\6:\7;9876543210009909/
+
+: mul1
+ s/![0-9]\([^<]*\)<\([0-9]\{0,1\}\)\([^>]*\)>\([0-9]\{0,1\}\)/0!\1\2<\3\4>/
+ /![0-9]/ s/\(:[^;]*\)\([1-9]\)\(0*\)\([^0]*\2\(.\).*X*\3\(9*\)\)/\1\5\6\4/
+/<~[^>]*>:0*;/!t mul1
+
+s/\(-*\)\1\([^>]*\).*/;\2^>:9876543210aaaaaaaaa/
+
+: mul2
+ s/\([0-9]~*\)^/^\1/
+ s/<\([0-9]*\)\(.*[~^]\)\([0-9]*\)>/\1<\2>\3/
+
+ : mul3
+ s/>\([0-9]\)\(.*\1.\{9\}\(a*\)\)/\1>\2;9\38\37\36\35\34\33\32\31\30/
+ s/\(;[^<]*\)\([0-9]\)<\([^;]*\).*\2[0-9]*\(.*\)/\4\1<\2\3/
+ s/a[0-9]/a/g
+ s/a\{10\}/b/g
+ s/b\{10\}/c/g
+ /|0*[1-9][^>]*>0*[1-9]/b mul3
+
+ s/;/a9876543210;/
+ s/a.\{9\}\(.\)[^;]*\([^,]*\)[0-9]\([.!]*\),/\2,\1\3/
+ y/cb/ba/
+/|<^/!b mul2
+b endbin
+
+: div
+# CDDET
+/^[-.0]*[1-9]/ !i\
+divide by 0
+//!b pop
+s/\(-*\)\([0-9]*\)\.*\([^~]*~-*\)\([0-9]*\)\.*\([^~]*\)/\2.\3\1;0\4.\5;0/
+: div1
+ s/^\.0\([^.]*\)\.;*\([0-9]\)\([0-9]*\);*0*/.\1\2.\3;0/
+ s/^\([^.]*\)\([0-9]\)\.\([^;]*;\)0*\([0-9]*\)\([0-9]\)\./\1.\2\30\4.\5/
+t div1
+s/~\(-*\)\1\(-*\);0*\([^;]*[0-9]\)[^~]*/~123456789743222111~\2\3/
+s/\(.\(.\)[^~]*\)[^9]*\2.\{8\}\(.\)[^~]*/\3~\1/
+s,|?.,&SaSadSaKdlaZ+LaX-1+[sb1]Sbd1>bkLatsbLa[dSa2lbla*-*dLa!=a]dSaxsakLasbLb*t,
+b next
+
+: rem
+s,|?%,&Sadla/LaKSa[999]k*Lak-,
+b next
+
+: exp
+# This decimal method is just a little faster than the binary method done
+# totally in dc: 1LaKLb [kdSb*LbK]Sb [[.5]*d0ktdSa<bkd*KLad1<a]Sa d1<a kk*
+/^[^~]*\./i\
+fraction in exponent ignored
+s,[^-0-9].*,;9d**dd*8*d*d7dd**d*6d**d5d*d*4*d3d*2lbd**1lb*0,
+: exp1
+ s/\([0-9]\);\(.*\1\([d*]*\)[^l]*\([^*]*\)\(\**\)\)/;dd*d**d*\4\3\5\2/
+t exp1
+G
+s,-*.\{9\}\([^9]*\)[^0]*0.\(.*|?.\),\2~saSaKdsaLb0kLbkK*+k1\1LaktsbkLax,
+s,|?.,&SadSbdXSaZla-SbKLaLadSb[0Lb-d1lb-*d+K+0kkSb[1Lb/]q]Sa0>a[dk]sadK<a[Lb],
+b next
+
+: sqrt
+# first square root using sed: 8k2v at 1:30am Dec 17, 1996
+/^-/i\
+square root of negative number
+/^[-0]/b next
+s/~.*//
+/^\./ s/0\([0-9]\)/\1/g
+/^\./ !s/[0-9][0-9]/7/g
+G
+s/\n/~/
+s,|?.,&K1+k KSbSb[dk]SadXdK<asadlb/lb+[.5]*[sbdlb/lb+[.5]*dlb>a]dsaxsasaLbsaLatLbk K1-kt,
+b next
+
+# END OF GSU dc.sed
diff --git a/testsuite/distrib.good b/testsuite/distrib.good
new file mode 100644
index 0000000..a6a8426
--- /dev/null
+++ b/testsuite/distrib.good
@@ -0,0 +1,29 @@
+Path: mailnewsgateway
+From crash@cygnus.com Wed Mar 8 18: 02:42 1995
+From: crash@cygnus.com (Jason Molenda)
+Message-ID: <9503090202.AA06931.alt.buddha.fat.short.guy@phydeaux.cygnus.com>
+Subject: Note for sed testsuite
+Original-To: molenda@msi.umn.edu
+Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST)
+X-Mailer: ELM [version 2.4 PL23]
+Newsgroups: alt.buddha.short.fat.guy
+Distribution: world
+Sender: news@cygnus.com
+Approved: alt.buddha.short.fat.guy@cygnus.com
+
+ _Summum Bonum_
+
+ All the breath and the bloom of the
+ year in the bag of one bee:
+ All the wonder and wealth of the mine in
+ the heart of one gem:
+ In the core of one pearl all the shade and the
+ shine of the sea:
+ Breath and bloom, shade and shine, -- wonder,
+ wealth, and -- how far above them --
+ Truth, thats brighter than gem,
+ Trust, that's purer than pearl, --
+ Brightest truth, purest trust in the universe --
+ all were for me
+ In the kiss of one girl.
+ -- Robert Browning
diff --git a/testsuite/distrib.inp b/testsuite/distrib.inp
new file mode 100644
index 0000000..ceaecec
--- /dev/null
+++ b/testsuite/distrib.inp
@@ -0,0 +1,28 @@
+From crash@cygnus.com Wed Mar 8 18:02:42 1995
+Received: from s1.msi.umn.edu (s1.msi.umn.edu [128.101.24.1]) by cygnus.com (8.6.9/8.6.9) with ESMTP id SAA21692 for <crash@cygnus.com>; Wed, 8 Mar 1995 18:02:41 -0800
+Received: from cygint.cygnus.com (cygint.cygnus.com [140.174.1.1]) by s1.msi.umn.edu (8.6.10/8.6.9) with ESMTP id TAA13398 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 19:59:18 -0600
+Received: from phydeaux.cygnus.com (phydeaux.cygnus.com [140.174.1.85]) by cygnus.com (8.6.9/8.6.9) with SMTP id SAA21688 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 18:02:33 -0800
+From: Jason Molenda <crash@cygnus.com>
+Received: by phydeaux.cygnus.com (5.65/4.7) id AA06931; Wed, 8 Mar 1995 18:02:28 -0800
+Message-Id: <9503090202.AA06931@phydeaux.cygnus.com>
+Subject: Note for sed testsuite
+To: molenda@msi.umn.edu
+Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST)
+X-Mailer: ELM [version 2.4 PL23]
+
+ _Summum Bonum_
+
+ All the breath and the bloom of the
+ year in the bag of one bee:
+ All the wonder and wealth of the mine in
+ the heart of one gem:
+ In the core of one pearl all the shade and the
+ shine of the sea:
+ Breath and bloom, shade and shine, -- wonder,
+ wealth, and -- how far above them --
+ Truth, thats brighter than gem,
+ Trust, that's purer than pearl, --
+ Brightest truth, purest trust in the universe --
+ all were for me
+ In the kiss of one girl.
+ -- Robert Browning
diff --git a/testsuite/distrib.sed b/testsuite/distrib.sed
new file mode 100644
index 0000000..918b30f
--- /dev/null
+++ b/testsuite/distrib.sed
@@ -0,0 +1,56 @@
+# This is straight out of C News
+#
+#
+# All this does is massage the headers so they look like what news
+# software expects. To:, Cc: and Resent-*: headers are masked.
+# Reply-To: is turned into references, which is questionable (could
+# just as well be dropped.
+#
+# The From: line is rewritten to use the "address (comments)" form
+# instead of "phrase <route>" form our mailer uses. Also, addresses
+# with no "@domainname" are assumed to originate locally, and so are
+# given a domain.
+#
+# The Sender: field below reflects the address of the person who
+# maintains our mailing lists. The Approved: field is in a special
+# form, so that we can do bidirectional gatewaying. Any message
+# in a newsgroup that bears this stamp will not be fed into the
+# matching mailing list.
+
+1i\
+Path: mailnewsgateway
+ :a
+ /^[Rr]eceived:/b r
+ /^[Nn]ewsgroups:/b r
+ /^[Pp]ath:/b r
+ /^[Tt][Oo]:/s/^/Original-/
+ /^[Cc][Cc]:/s/^/Original-/
+ /^[Rr][Ee][Ss][Ee][Nn][Tt]-.*/s/^/Original-/
+ /^[Mm][Ee][Ss][Ss][Aa][Gg][Ee]-[Ii][Dd]:/s/@/.alt.buddha.fat.short.guy@/
+ s/^[Ii]n-[Rr]eply-[Tt]o:/References:/
+ /^From:/{
+ s/<\([^@]*\)>$/<\1@$thissite>/
+ s/^From:[ ][ ]*\(.*\) *<\(.*\)>$/From: \2 (\1)/
+ }
+ s/-[Ii]d:/-ID:/
+ s/^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]:[ ]*$/Subject: (none)/
+ s/^\([^:]*:\)[ ]*/\1 /
+ /^$/{i\
+Newsgroups: alt.buddha.short.fat.guy\
+Distribution: world\
+Sender: news@cygnus.com\
+Approved: alt.buddha.short.fat.guy@cygnus.com
+ b e
+ }
+ p
+ n
+ b a
+ :r
+ s/.*//g
+ n
+ /^[ ]/b r
+ b a
+ :e
+ p
+ n
+ b e
diff --git a/testsuite/distrib.sh b/testsuite/distrib.sh
new file mode 100644
index 0000000..dbadbdc
--- /dev/null
+++ b/testsuite/distrib.sh
@@ -0,0 +1,63 @@
+#! /bin/sh
+#
+# This is stolen from C News
+#
+
+
+#
+# All this does is massage the headers so they look like what news
+# software expects. To:, Cc: and Resent-*: headers are masked.
+# Reply-To: is turned into references, which is questionable (could
+# just as well be dropped.
+#
+# The From: line is rewritten to use the "address (comments)" form
+# instead of "phrase <route>" form our mailer uses. Also, addresses
+# with no "@domainname" are assumed to originate locally, and so are
+# given a domain.
+#
+# The Sender: field below reflects the address of the person who
+# maintains our mailing lists. The Approved: field is in a special
+# form, so that we can do bidirectional gatewaying. Any message
+# in a newsgroup that bears this stamp will not be fed into the
+# matching mailing list.
+
+sed=${1-sed}
+
+$sed -n -e "1{i\\
+Path: mailnewsgateway
+ }" \
+ -e ":a
+ /^[Rr]eceived:/b r
+ /^[Nn]ewsgroups:/b r
+ /^[Pp]ath:/b r
+ /^[Tt][Oo]:/s/^/Original-/
+ /^[Cc][Cc]:/s/^/Original-/
+ /^[Rr][Ee][Ss][Ee][Nn][Tt]-.*/s/^/Original-/
+ /^[Mm][Ee][Ss][Ss][Aa][Gg][Ee]-[Ii][Dd]:/s/@/.alt.buddha.fat.short.guy@/
+ s/^[Ii]n-[Rr]eply-[Tt]o:/References:/
+ /^From:/{
+ s/<\([^@]*\)>\$/<\1@$thissite>/
+ s/^From:[ ][ ]*\(.*\) *<\(.*\)>\$/From: \2 (\1)/
+ }
+ s/-[Ii]d:/-ID:/
+ s/^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]:[ ]*$/Subject: (none)/
+ s/^\([^:]*:\)[ ]*/\1 /
+ /^\$/{i\\
+Newsgroups: alt.buddha.short.fat.guy\\
+Distribution: world\\
+Sender: news@cygnus.com\\
+Approved: alt.buddha.short.fat.guy@cygnus.com
+ b e
+ }
+ p
+ n
+ b a
+ :r
+ s/.*//g
+ n
+ /^[ ]/b r
+ b a
+ :e
+ p
+ n
+ b e"
diff --git a/testsuite/dollar.good b/testsuite/dollar.good
new file mode 100644
index 0000000..23e072a
--- /dev/null
+++ b/testsuite/dollar.good
@@ -0,0 +1,4 @@
+I can't quite remember where I heard it,
+but I can't seem to get out of my head
+the phrase
+space the final frontier
diff --git a/testsuite/dollar.inp b/testsuite/dollar.inp
new file mode 100644
index 0000000..9267e98
--- /dev/null
+++ b/testsuite/dollar.inp
@@ -0,0 +1,4 @@
+I can't quite remember where I heard it,
+but I can't seem to get out of my head
+the phrase
+the final frontier
diff --git a/testsuite/dollar.sed b/testsuite/dollar.sed
new file mode 100644
index 0000000..5fbb35c
--- /dev/null
+++ b/testsuite/dollar.sed
@@ -0,0 +1 @@
+$s/^/space /
diff --git a/testsuite/empty.good b/testsuite/empty.good
new file mode 100644
index 0000000..07e1a15
--- /dev/null
+++ b/testsuite/empty.good
@@ -0,0 +1,2 @@
+x
+
diff --git a/testsuite/empty.inp b/testsuite/empty.inp
new file mode 100644
index 0000000..07e1a15
--- /dev/null
+++ b/testsuite/empty.inp
@@ -0,0 +1,2 @@
+x
+
diff --git a/testsuite/empty.sed b/testsuite/empty.sed
new file mode 100644
index 0000000..b35aed6
--- /dev/null
+++ b/testsuite/empty.sed
@@ -0,0 +1 @@
+s/^ *//
diff --git a/testsuite/enable.good b/testsuite/enable.good
new file mode 100644
index 0000000..c6588ba
--- /dev/null
+++ b/testsuite/enable.good
@@ -0,0 +1,3 @@
+targets
+x11-testing
+wollybears-in-minnesota
diff --git a/testsuite/enable.inp b/testsuite/enable.inp
new file mode 100644
index 0000000..4509a8d
--- /dev/null
+++ b/testsuite/enable.inp
@@ -0,0 +1,3 @@
+--enable-targets=sparc-sun-sunos4.1.3,srec
+--enable-x11-testing=on
+--enable-wollybears-in-minnesota=yes-id-like-that
diff --git a/testsuite/enable.sed b/testsuite/enable.sed
new file mode 100644
index 0000000..0d2a208
--- /dev/null
+++ b/testsuite/enable.sed
@@ -0,0 +1,2 @@
+# inspired by an autoconf generated configure script.
+s/-*enable-//;s/=.*//
diff --git a/testsuite/eval.good b/testsuite/eval.good
new file mode 100644
index 0000000..6fd021b
--- /dev/null
+++ b/testsuite/eval.good
@@ -0,0 +1,40 @@
+abcd
+---
+abcd
+---
+abcd
+---
+17380: 2 2 5 11 79
+cpu
+---
+17380: 2 2 5 11 79
+cpu
+---
+17380: 2 2 5 11 79
+cpu
+---
+ abcd
+---
+ abcd
+---
+ abcd
+---
+17380: 2 2 5 11 79
+ cpu
+---
+17380: 2 2 5 11 79
+ cpu
+---
+17380: 2 2 5 11 79
+ cpu
+---
+Doing some more tests -----------------------
+17380: 2 2 5 11 79
+---
+../sed/sed 1q eval.in2
+---
+17380: 2 2 5 11 79
+---
+../sed/sed 1q eval.in2
+---
+../sed/sed 1q eval.in2
diff --git a/testsuite/eval.in2 b/testsuite/eval.in2
new file mode 100644
index 0000000..4e30989
--- /dev/null
+++ b/testsuite/eval.in2
@@ -0,0 +1,5 @@
+17380: 2 2 5 11 79
+abcd
+cpu
+ abcd
+ cpu
diff --git a/testsuite/eval.inp b/testsuite/eval.inp
new file mode 100644
index 0000000..4e30989
--- /dev/null
+++ b/testsuite/eval.inp
@@ -0,0 +1,5 @@
+17380: 2 2 5 11 79
+abcd
+cpu
+ abcd
+ cpu
diff --git a/testsuite/eval.sed b/testsuite/eval.sed
new file mode 100644
index 0000000..5734786
--- /dev/null
+++ b/testsuite/eval.sed
@@ -0,0 +1,46 @@
+1d
+
+ #Try eval command
+ /cpu/!b2
+ e../sed/sed 1q eval.in2
+
+:2
+p
+i---
+h
+
+ #Try eval option
+ s,.* *cpu *,../sed/sed 1q eval.in2; echo "&",e
+
+:3
+p
+g
+i---
+
+ h
+ #Try eval option with print
+ s,.* *cpu.*,../sed/sed 1q eval.in2,ep
+ g
+
+
+:4
+p
+i---
+
+$!d
+
+#Do some more tests
+s/.*/Doing some more tests -----------------------/p
+s,.*,../sed/sed 1q eval.in2,ep
+i---
+s,.*,../sed/sed 1q eval.in2,pe
+i---
+s,.*,../sed/sed 1q eval.in2,
+h
+e
+p
+g
+i---
+s/^/echo /ep
+i---
+s/^fubar$/echo wozthis/e
diff --git a/testsuite/factor.good b/testsuite/factor.good
new file mode 100644
index 0000000..c703182
--- /dev/null
+++ b/testsuite/factor.good
@@ -0,0 +1,15 @@
+2
+3
+2
+2
+5
+2
+2
+2
+11
+2
+2
+2
+2
+13
+11
diff --git a/testsuite/factor.inp b/testsuite/factor.inp
new file mode 100644
index 0000000..1c2e796
--- /dev/null
+++ b/testsuite/factor.inp
@@ -0,0 +1,8 @@
+2
+3
+4
+5
+8
+11
+16
+143
diff --git a/testsuite/factor.sed b/testsuite/factor.sed
new file mode 100644
index 0000000..4416e35
--- /dev/null
+++ b/testsuite/factor.sed
@@ -0,0 +1,76 @@
+#! /bin/sed -nf
+
+s/.*/&;9aaaaaaaaa8aaaaaaaa7aaaaaaa6aaaaaa5aaaaa4aaaa3aaa2aa1a0/
+:encode
+s/\(a*\)\([0-9]\)\([0-9]*;.*\2\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4\3/
+tencode
+s/;.*//
+
+# Compute a few common factors for speed. Clear the subst flag
+t7a
+
+# These are placed here to make the flow harder to understand :-)
+:2
+a\
+2
+b2a
+:3
+a\
+3
+b3a
+:5
+a\
+5
+b5a
+:7
+a\
+7
+
+:7a
+s/^\(aa*\)\1\{6\}$/\1/
+t7
+:5a
+s/^\(aa*\)\1\{4\}$/\1/
+t5
+:3a
+s/^\(aa*\)\1\1$/\1/
+t3
+:2a
+s/^\(aa*\)\1$/\1/
+t2
+
+/^a$/b
+
+# The quotient of dividing by 11 is a limit to the remaining prime factors
+s/^\(aa*\)\1\{10\}/\1=&/
+
+# Pattern space looks like CANDIDATE\nNUMBER. When a candidate is valid,
+# the number is divided and the candidate is tried again
+:factor
+/^\(a\{7,\}\)=\1\1*$/! {
+ # Decrement CANDIDATE, and search again if it is still >1
+ s/^a//
+ /^aa/b factor
+
+ # Print the last remaining factor: since it is stored in the NUMBER
+ # rather than in the CANDIDATE, swap 'em: now NUMBER=1
+ s/\(.*\)=\(.*\)/\2=\1/
+}
+
+# We have a prime factor in CANDIDATE! Print it
+h
+s/=.*/;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9/
+
+:decode
+s/^\(a*\)\1\{9\}\(a\{0,9\}\)\([0-9]*;.*[^a]\2\([0-9]\)\)/\1\4\3/
+/^a/tdecode
+s/;.*//p
+
+g
+:divide
+s/^\(a*\)\(=b*\)\1/\1\2b/
+tdivide
+y/b/a/
+
+# If NUMBER = 1, we don't have any more factors
+/aa$/bfactor
diff --git a/testsuite/fasts.good b/testsuite/fasts.good
new file mode 100644
index 0000000..d1c7e4a
--- /dev/null
+++ b/testsuite/fasts.good
@@ -0,0 +1,14 @@
+aaaaaabbbbbbaaaaaaa
+bbbbbb
+aaaaaabbbbbbaaaaaaa
+aaaaaabbbbbbaaaaaaa
+aaaaaaabbbbbbaaaaaaa
+aaaaaabbbbbbaaaaaaa
+aaaaaaabbbbbbaaaaaa
+bbbbbbbbbbbbbbbbbbb
+
+bbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbb
+bbbbbbbbbbbbbbbbbbbb
diff --git a/testsuite/fasts.inp b/testsuite/fasts.inp
new file mode 100644
index 0000000..361e17b
--- /dev/null
+++ b/testsuite/fasts.inp
@@ -0,0 +1 @@
+aaaaaaabbbbbbaaaaaaa
diff --git a/testsuite/fasts.sed b/testsuite/fasts.sed
new file mode 100644
index 0000000..5e482f7
--- /dev/null
+++ b/testsuite/fasts.sed
@@ -0,0 +1,46 @@
+# test `fast' substitutions
+
+h
+s/a//
+p
+g
+s/a//g
+p
+g
+s/^a//p
+g
+s/^a//g
+p
+g
+s/not present//g
+p
+g
+s/^[a-z]//g
+p
+g
+s/a$//
+p
+g
+
+y/a/b/
+h
+s/b//
+p
+g
+s/b//g
+p
+g
+s/^b//p
+g
+s/^b//g
+p
+g
+s/^[a-z]//g
+p
+g
+s/b$//
+p
+g
+
+
+
diff --git a/testsuite/flipcase.good b/testsuite/flipcase.good
new file mode 100644
index 0000000..9fcffa2
--- /dev/null
+++ b/testsuite/flipcase.good
@@ -0,0 +1,25 @@
+09 - 02 - 2002 00.00 Tg La7 La7 -
+09 - 02 - 2002 00.00 Brand New Tmc 2 -
+09 - 02 - 2002 00.10 Tg1 Notte Rai Uno -
+09 - 02 - 2002 00.15 Tg Parlamento Rai Due -
+09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 -
+09 - 02 - 2002 00.20 Berserk - La Confessione Di Gatz Italia 1 Cartoon
+09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai Tre -
+09 - 02 - 2002 00.25 Meteo 2 Rai Due -
+09 - 02 - 2002 00.30 Appuntamento Al Cinema Rai Due -
+09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre -
+09 - 02 - 2002 00.35 Profiler Rai Due -
+09 - 02 - 2002 00.35 Stampa Oggi - Che Tempo Fa Rai Uno -
+09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno -
+09 - 02 - 2002 00.45 Bollettino Della Neve Rete 4 News
+09 - 02 - 2002 00.50 Studio Aperto - La Giornata Italia 1 News
+09 - 02 - 2002 00.50 Bocca A Bocca - 2 Tempo Rete 4 Film
+09 - 02 - 2002 01.00 Appuntamento Al Cinema Rai Tre -
+09 - 02 - 2002 01.00 Music Non Stop Tmc 2 -
+09 - 02 - 2002 01.00 Studio Sport Italia 1 Sport
+09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News
+09 - 02 - 2002 01.05 Fuori Orario. Cose (Mai) Viste Rai Tre -
+09 - 02 - 2002 01.15 Rainotte Rai Due -
+09 - 02 - 2002 01.15 Sottovoce Rai Uno -
+09 - 02 - 2002 01.15 Giochi Olimpici Invernali - Cerimonia Di Apertura Rai Tre -
+09 - 02 - 2002 01.17 Italia Interroga Rai Due -
diff --git a/testsuite/flipcase.inp b/testsuite/flipcase.inp
new file mode 100644
index 0000000..f91ec11
--- /dev/null
+++ b/testsuite/flipcase.inp
@@ -0,0 +1,25 @@
+09 - 02 - 2002 00.00 Tg La7 La7 -
+09 - 02 - 2002 00.00 Brand New Tmc 2 -
+09 - 02 - 2002 00.10 Tg1 Notte Rai Uno -
+09 - 02 - 2002 00.15 Tg Parlamento Rai Due -
+09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 -
+09 - 02 - 2002 00.20 Berserk - La CoNFESSIONE Di Gatz Italia 1 Cartoon
+09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai TrE -
+09 - 02 - 2002 00.25 Meteo 2 Rai Due -
+09 - 02 - 2002 00.30 Appuntamento Al CinEMA RaI Due -
+09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre -
+09 - 02 - 2002 00.35 Profiler Rai Due -
+09 - 02 - 2002 00.35 Stampa OggI - Che Tempo Fa Rai Uno -
+09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno -
+09 - 02 - 2002 00.45 BollettINO Della NEVE RETE 4 News
+09 - 02 - 2002 00.50 STUDIO Aperto - La Giornata Italia 1 News
+09 - 02 - 2002 00.50 BOCCA A Bocca - 2 Tempo Rete 4 Film
+09 - 02 - 2002 01.00 AppuntAMENTO Al Cinema Rai Tre -
+09 - 02 - 2002 01.00 Music NoN Stop Tmc 2 -
+09 - 02 - 2002 01.00 Studio SpORT Italia 1 SporT
+09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News
+09 - 02 - 2002 01.05 Fuori Orario. CosE (Mai) Viste Rai Tre -
+09 - 02 - 2002 01.15 RAINOTTE Rai Due -
+09 - 02 - 2002 01.15 Sottovoce Rai Uno -
+09 - 02 - 2002 01.15 GiOCHI Olimpici InVERNALI - CERIMONIA Di Apertura Rai Tre -
+09 - 02 - 2002 01.17 Italia Interroga Rai Due -
diff --git a/testsuite/flipcase.sed b/testsuite/flipcase.sed
new file mode 100644
index 0000000..211d0d0
--- /dev/null
+++ b/testsuite/flipcase.sed
@@ -0,0 +1 @@
+s,\([^A-Za-z]*\)\([A-Za-z]*\),\1\L\u\2,g \ No newline at end of file
diff --git a/testsuite/head.good b/testsuite/head.good
new file mode 100644
index 0000000..6392831
--- /dev/null
+++ b/testsuite/head.good
@@ -0,0 +1,3 @@
+ "...by imposing a tiny bit of order in a communication you are
+ translating, you are carving out a little bit of order in the
+ universe. You will never succeed. Everything will fail and come
diff --git a/testsuite/head.inp b/testsuite/head.inp
new file mode 100644
index 0000000..5c4b4a4
--- /dev/null
+++ b/testsuite/head.inp
@@ -0,0 +1,9 @@
+ "...by imposing a tiny bit of order in a communication you are
+ translating, you are carving out a little bit of order in the
+ universe. You will never succeed. Everything will fail and come
+ to an end finally. But you have a chance to carve a little bit
+ of order and maybe even beauty out of the raw materials that
+ surround you everywhere, and I think there is no greater meaning
+ in life."
+
+ Donald L. Philippi, Oct 1930 - Jan 1993
diff --git a/testsuite/head.sed b/testsuite/head.sed
new file mode 100644
index 0000000..d8ea37d
--- /dev/null
+++ b/testsuite/head.sed
@@ -0,0 +1 @@
+3q
diff --git a/testsuite/inclib.good b/testsuite/inclib.good
new file mode 100644
index 0000000..6b1279a
--- /dev/null
+++ b/testsuite/inclib.good
@@ -0,0 +1,34 @@
+ /usr/X11R6/include
+ /usr/X11R5/include
+ /usr/X11R4/include
+
+ /usr/include/X11R6
+ /usr/include/X11R5
+ /usr/include/X11R4
+
+ /usr/local/X11R6/include
+ /usr/local/X11R5/include
+ /usr/local/X11R4/include
+
+ /usr/local/include/X11R6
+ /usr/local/include/X11R5
+ /usr/local/include/X11R4
+
+ /usr/X11/include
+ /usr/include/X11
+ /usr/local/X11/include
+ /usr/local/include/X11
+
+ /usr/X386/include
+ /usr/x386/include
+ /usr/XFree86/include/X11
+
+ /usr/include
+ /usr/local/include
+ /usr/unsupported/include
+ /usr/athena/include
+ /usr/local/x11r5/include
+ /usr/lpp/Xamples/include
+
+ /usr/openwin/include
+ /usr/openwin/share/include
diff --git a/testsuite/inclib.inp b/testsuite/inclib.inp
new file mode 100644
index 0000000..552e9e2
--- /dev/null
+++ b/testsuite/inclib.inp
@@ -0,0 +1,34 @@
+ /usr/X11R6/lib
+ /usr/X11R5/lib
+ /usr/X11R4/lib
+
+ /usr/lib/X11R6
+ /usr/lib/X11R5
+ /usr/lib/X11R4
+
+ /usr/local/X11R6/lib
+ /usr/local/X11R5/lib
+ /usr/local/X11R4/lib
+
+ /usr/local/lib/X11R6
+ /usr/local/lib/X11R5
+ /usr/local/lib/X11R4
+
+ /usr/X11/lib
+ /usr/lib/X11
+ /usr/local/X11/lib
+ /usr/local/lib/X11
+
+ /usr/X386/lib
+ /usr/x386/lib
+ /usr/XFree86/lib/X11
+
+ /usr/lib
+ /usr/local/lib
+ /usr/unsupported/lib
+ /usr/athena/lib
+ /usr/local/x11r5/lib
+ /usr/lpp/Xamples/lib
+
+ /usr/openwin/lib
+ /usr/openwin/share/lib
diff --git a/testsuite/inclib.sed b/testsuite/inclib.sed
new file mode 100644
index 0000000..528f158
--- /dev/null
+++ b/testsuite/inclib.sed
@@ -0,0 +1,2 @@
+# inspired by an autoconf generated configure script.
+s;lib;include;
diff --git a/testsuite/insens.good b/testsuite/insens.good
new file mode 100644
index 0000000..6fd1bc1
--- /dev/null
+++ b/testsuite/insens.good
@@ -0,0 +1,2 @@
+1.2.3
+1.2.3
diff --git a/testsuite/insens.inp b/testsuite/insens.inp
new file mode 100644
index 0000000..baefc12
--- /dev/null
+++ b/testsuite/insens.inp
@@ -0,0 +1 @@
+Version: 1.2.3
diff --git a/testsuite/insens.sed b/testsuite/insens.sed
new file mode 100644
index 0000000..afab9fa
--- /dev/null
+++ b/testsuite/insens.sed
@@ -0,0 +1,4 @@
+h
+s/Version: *//p
+g
+s/version: *//Ip
diff --git a/testsuite/khadafy.good b/testsuite/khadafy.good
new file mode 100644
index 0000000..e719f4e
--- /dev/null
+++ b/testsuite/khadafy.good
@@ -0,0 +1,32 @@
+1) Muammar Qaddafi
+2) Mo'ammar Gadhafi
+3) Muammar Kaddafi
+4) Muammar Qadhafi
+5) Moammar El Kadhafi
+6) Muammar Gadafi
+7) Mu'ammar al-Qadafi
+8) Moamer El Kazzafi
+9) Moamar al-Gaddafi
+10) Mu'ammar Al Qathafi
+11) Muammar Al Qathafi
+12) Mo'ammar el-Gadhafi
+13) Moamar El Kadhafi
+14) Muammar al-Qadhafi
+15) Mu'ammar al-Qadhdhafi
+16) Mu'ammar Qadafi
+17) Moamar Gaddafi
+18) Mu'ammar Qadhdhafi
+19) Muammar Khaddafi
+20) Muammar al-Khaddafi
+21) Mu'amar al-Kadafi
+22) Muammar Ghaddafy
+23) Muammar Ghadafi
+24) Muammar Ghaddafi
+25) Muamar Kaddafi
+26) Muammar Quathafi
+27) Muammar Gheddafi
+28) Muamar Al-Kaddafi
+29) Moammar Khadafy
+30) Moammar Qudhafi
+31) Mu'ammar al-Qaddafi
+32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi
diff --git a/testsuite/khadafy.inp b/testsuite/khadafy.inp
new file mode 100644
index 0000000..e719f4e
--- /dev/null
+++ b/testsuite/khadafy.inp
@@ -0,0 +1,32 @@
+1) Muammar Qaddafi
+2) Mo'ammar Gadhafi
+3) Muammar Kaddafi
+4) Muammar Qadhafi
+5) Moammar El Kadhafi
+6) Muammar Gadafi
+7) Mu'ammar al-Qadafi
+8) Moamer El Kazzafi
+9) Moamar al-Gaddafi
+10) Mu'ammar Al Qathafi
+11) Muammar Al Qathafi
+12) Mo'ammar el-Gadhafi
+13) Moamar El Kadhafi
+14) Muammar al-Qadhafi
+15) Mu'ammar al-Qadhdhafi
+16) Mu'ammar Qadafi
+17) Moamar Gaddafi
+18) Mu'ammar Qadhdhafi
+19) Muammar Khaddafi
+20) Muammar al-Khaddafi
+21) Mu'amar al-Kadafi
+22) Muammar Ghaddafy
+23) Muammar Ghadafi
+24) Muammar Ghaddafi
+25) Muamar Kaddafi
+26) Muammar Quathafi
+27) Muammar Gheddafi
+28) Muamar Al-Kaddafi
+29) Moammar Khadafy
+30) Moammar Qudhafi
+31) Mu'ammar al-Qaddafi
+32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi
diff --git a/testsuite/khadafy.sed b/testsuite/khadafy.sed
new file mode 100644
index 0000000..8ac81c0
--- /dev/null
+++ b/testsuite/khadafy.sed
@@ -0,0 +1,2 @@
+# The Khadafy test is brought to you by Scott Anderson . . .
+/M[ou]'\{0,1\}am\{1,2\}[ae]r .*\([AEae]l[- ]\)\{0,1\}[GKQ]h\{0,1\}[aeu]\{1,\}\([dtz][dhz]\{0,1\}\)\{1,\}af[iy]/!d
diff --git a/testsuite/linecnt.good b/testsuite/linecnt.good
new file mode 100644
index 0000000..3cc1bd6
--- /dev/null
+++ b/testsuite/linecnt.good
@@ -0,0 +1,110 @@
+1
+A dialogue on poverty
+2
+
+3
+ On the night when the rain beats,
+4
+ Driven by the wind,
+5
+ On the night when the snowflakes mingle
+6
+ With a sleety rain,
+7
+ I feel so helplessly cold.
+8
+ I nibble at a lump of salt,
+9
+ Sip the hot, oft-diluted dregs of _sake_;
+10
+ And coughing, snuffling,
+11
+ And stroking my scanty beard,
+12
+ I say in my pride,
+13
+ "There's none worthy, save I!"
+14
+ But I shiver still with cold.
+15
+ I pull up my hempen bedclothes,
+16
+ Wear what few sleeveless clothes I have,
+17
+ But cold and bitter is the night!
+18
+ As for those poorer than myself,
+19
+ Their parents must be cold and hungry,
+20
+ Their wives and children beg and cry.
+21
+ Then, how do you struggle through life?
+22
+
+23
+ Wide as they call the heaven and earth,
+24
+ For me they have shrunk quite small;
+25
+ Bright though they call the sun and moon,
+26
+ They never shine for me.
+27
+ Is it the same with all men,
+28
+ Or for me alone?
+29
+ By rare chance I was born a man
+30
+ And no meaner than my fellows,
+31
+ But, wearing unwadded sleeveless clothes
+32
+ In tatters, like weeds waving in the sea,
+33
+ Hanging from my shoulders,
+34
+ And under the sunken roof,
+35
+ Within the leaning walls,
+36
+ Here I lie on straw
+37
+ Spread on bare earth,
+38
+ With my parents at my pillow,
+39
+ And my wife and children at my feet,
+40
+ All huddled in grief and tears.
+41
+ No fire sends up smoke
+42
+ At the cooking-place,
+43
+ And in the cauldron
+44
+ A spider spins its web.
+45
+ With not a grain to cook,
+46
+ We moan like the night thrush.
+47
+ Then, "to cut," as the saying is,
+48
+ "The ends of what is already too short,"
+49
+ The village headman comes,
+50
+ With rod in hand, to our sleeping place,
+51
+ Growling for his dues.
+52
+ Must it be so hopeless --
+53
+ The way of this world?
+54
+
+55
+ -- Yamanoue Okura
diff --git a/testsuite/linecnt.inp b/testsuite/linecnt.inp
new file mode 100644
index 0000000..9eb6070
--- /dev/null
+++ b/testsuite/linecnt.inp
@@ -0,0 +1,55 @@
+A dialogue on poverty
+
+ On the night when the rain beats,
+ Driven by the wind,
+ On the night when the snowflakes mingle
+ With a sleety rain,
+ I feel so helplessly cold.
+ I nibble at a lump of salt,
+ Sip the hot, oft-diluted dregs of _sake_;
+ And coughing, snuffling,
+ And stroking my scanty beard,
+ I say in my pride,
+ "There's none worthy, save I!"
+ But I shiver still with cold.
+ I pull up my hempen bedclothes,
+ Wear what few sleeveless clothes I have,
+ But cold and bitter is the night!
+ As for those poorer than myself,
+ Their parents must be cold and hungry,
+ Their wives and children beg and cry.
+ Then, how do you struggle through life?
+
+ Wide as they call the heaven and earth,
+ For me they have shrunk quite small;
+ Bright though they call the sun and moon,
+ They never shine for me.
+ Is it the same with all men,
+ Or for me alone?
+ By rare chance I was born a man
+ And no meaner than my fellows,
+ But, wearing unwadded sleeveless clothes
+ In tatters, like weeds waving in the sea,
+ Hanging from my shoulders,
+ And under the sunken roof,
+ Within the leaning walls,
+ Here I lie on straw
+ Spread on bare earth,
+ With my parents at my pillow,
+ And my wife and children at my feet,
+ All huddled in grief and tears.
+ No fire sends up smoke
+ At the cooking-place,
+ And in the cauldron
+ A spider spins its web.
+ With not a grain to cook,
+ We moan like the night thrush.
+ Then, "to cut," as the saying is,
+ "The ends of what is already too short,"
+ The village headman comes,
+ With rod in hand, to our sleeping place,
+ Growling for his dues.
+ Must it be so hopeless --
+ The way of this world?
+
+ -- Yamanoue Okura
diff --git a/testsuite/linecnt.sed b/testsuite/linecnt.sed
new file mode 100644
index 0000000..3134d36
--- /dev/null
+++ b/testsuite/linecnt.sed
@@ -0,0 +1 @@
+=
diff --git a/testsuite/mac-mf.good b/testsuite/mac-mf.good
new file mode 100644
index 0000000..9be165d
--- /dev/null
+++ b/testsuite/mac-mf.good
@@ -0,0 +1,200 @@
+## config:mac-pre.in
+## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree
+
+#
+# MPW-style lines for the MakeFile
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+
+#
+# End of MPW-style lines for MakeFile
+#
+
+WHAT = mac
+
+# Directory syntax Ä
+R=
+C=
+S=:
+U=:
+
+BUILDTOP = :::
+srcdir =
+
+# FIXME Ä This doesn't translate to MPW yet, srcdir must be same as objdir
+# File in object dir can come from either the current dir or srcdir
+#
+# . Ä . "{srcdir}"
+
+# Default rule that puts each file into separate segment
+
+.c.o Ä .c
+ {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o
+
+CPPFLAGS = -i {SRCTOP}:include -i {BUILDTOP}:include -i {SRCTOP}:include:krb5 -i {BUILDTOP}:include:krb5 -i {CIncludes}
+DEFS = {CPPFLAGS}
+CC = c
+LD = link
+# The funny quoting in the LDFLAGS is to avoid xxx.c.o being mangled by
+# mac-mf.sed into xxx.c.o
+LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o
+CCOPTS =
+LIBS =
+KRB5ROOT= @KRB5ROOT@
+KRB4=@KRB4@
+INSTALL=Duplicate -y
+INSTALL_PROGRAM=Duplicate -y
+INSTALL_DATA=Duplicate -y
+INSTALL_SETUID=Duplicate -y
+
+KRB5MANROOT = {KRB5ROOT}{S}man
+ADMIN_BINDIR = {KRB5ROOT}{S}admin
+SERVER_BINDIR = {KRB5ROOT}{S}sbin
+CLIENT_BINDIR = {KRB5ROOT}{S}bin
+ADMIN_MANDIR = {KRB5MANROOT}{S}man8
+SERVER_MANDIR = {KRB5MANROOT}{S}man8
+CLIENT_MANDIR = {KRB5MANROOT}{S}man1
+FILE_MANDIR = {KRB5MANROOT}{S}man5
+KRB5_LIBDIR = {KRB5ROOT}{S}lib
+KRB5_INCDIR = {KRB5ROOT}{S}include
+KRB5_INCSUBDIRS = ¶
+ {KRB5_INCDIR}{S}krb5 ¶
+ {KRB5_INCDIR}{S}asn.1 ¶
+ {KRB5_INCDIR}{S}kerberosIV
+
+
+RM = Delete -y -i
+CP = Duplicate -y
+MV = mv -f
+CHMOD=chmod
+RANLIB = @RANLIB@
+ARCHIVE = @ARCHIVE@
+ARADD = @ARADD@
+LN = @LN_S@
+AWK = @AWK@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+YACC = @YACC@
+
+# FIXME Ä This won't work for srcdir != objdir. But on the Mac, there
+# is no easy way to build a relative or absolute path, because Ä means
+# both the path separator, and the "go up a directory" indicator
+#SRCTOP = {srcdir}{S}{BUILDTOP}
+SRCTOP = {BUILDTOP}
+SUBDIRS = @subdirs@
+
+TOPLIBD = {BUILDTOP}{S}lib
+
+OBJEXT = c.o
+LIBEXT = a
+EXEEXT =
+
+all ÄÄ
+# Generated automatically from Makefile.in by configure
+CFLAGS = {CCOPTS} {DEFS} -i ::des
+
+##DOSBUILDTOP = ..\..\:
+##DOSLIBNAME=..\crypto.lib
+##DOS!include {BUILDTOP}\config\windows.in
+
+OBJS= md5.{OBJEXT} md5glue.{OBJEXT} md5crypto.{OBJEXT}
+
+SRCS= md5.c md5glue.c md5crypto.c
+
+all ÄÄ {OBJS}
+
+t_mddriver Ä t_mddriver.c.o md5.c.o
+ Link {LDFLAGS} -o t_mddriver t_mddriver.c.o md5.c.o
+
+t_mddriver.exe Ä
+ {CC} {CFLAGS2} -o t_mddriver.exe t_mddriver.c md5.c
+
+check ÄÄ t_mddriver{EXEEXT}
+ {C}t_mddriver{EXEEXT} -x
+
+clean ÄÄ
+ {RM} t_mddriver{EXEEXT} t_mddriver.{OBJEXT}
+# config:post.in
+# put all ÄÄ first just in case no other rules occur here
+#
+all ÄÄ
+
+check ÄÄ
+
+clean ÄÄ clean-{WHAT}
+ {RM} config.log pre.c.out post.c.out Makefile.c.out
+
+clean-unix ÄÄ
+ if test -n "{OBJS}" ; then {RM} {OBJS}; else Ä ; fi
+
+clean-windows ÄÄ
+ {RM} Å.{OBJEXT}
+ {RM} msvc.pdb Å.err
diff --git a/testsuite/mac-mf.inp b/testsuite/mac-mf.inp
new file mode 100644
index 0000000..3adaee2
--- /dev/null
+++ b/testsuite/mac-mf.inp
@@ -0,0 +1,200 @@
+## config/mac-pre.in
+## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree.
+
+#
+# MPW-style lines for the MakeFile.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+
+#
+# End of MPW-style lines for MakeFile.
+#
+
+WHAT = mac
+
+# Directory syntax:
+R=
+C=
+S=:
+U=:
+
+BUILDTOP = ../../..
+srcdir = .
+
+# FIXME: This doesn't translate to MPW yet, srcdir must be same as objdir.
+# File in object dir can come from either the current dir or srcdir.
+#
+# . : . "{srcdir}"
+
+# Default rule that puts each file into separate segment.
+
+.c.o: .c
+ {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o
+
+CPPFLAGS = -I$(SRCTOP)/include -I$(BUILDTOP)/include -I$(SRCTOP)/include/krb5 -I$(BUILDTOP)/include/krb5 -i {CIncludes}
+DEFS = $(CPPFLAGS)
+CC = c
+LD = link
+# The funny quoting in the LDFLAGS is to avoid xxx.o being mangled by
+# mac-mf.sed into xxx.c.o.
+LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o
+CCOPTS =
+LIBS =
+KRB5ROOT= @KRB5ROOT@
+KRB4=@KRB4@
+INSTALL=Duplicate -y
+INSTALL_PROGRAM=Duplicate -y
+INSTALL_DATA=Duplicate -y
+INSTALL_SETUID=Duplicate -y
+
+KRB5MANROOT = $(KRB5ROOT)$(S)man
+ADMIN_BINDIR = $(KRB5ROOT)$(S)admin
+SERVER_BINDIR = $(KRB5ROOT)$(S)sbin
+CLIENT_BINDIR = $(KRB5ROOT)$(S)bin
+ADMIN_MANDIR = $(KRB5MANROOT)$(S)man8
+SERVER_MANDIR = $(KRB5MANROOT)$(S)man8
+CLIENT_MANDIR = $(KRB5MANROOT)$(S)man1
+FILE_MANDIR = $(KRB5MANROOT)$(S)man5
+KRB5_LIBDIR = $(KRB5ROOT)$(S)lib
+KRB5_INCDIR = $(KRB5ROOT)$(S)include
+KRB5_INCSUBDIRS = \
+ $(KRB5_INCDIR)$(S)krb5 \
+ $(KRB5_INCDIR)$(S)asn.1 \
+ $(KRB5_INCDIR)$(S)kerberosIV
+
+
+RM = Delete -y -i
+CP = Duplicate -y
+MV = mv -f
+CHMOD=chmod
+RANLIB = @RANLIB@
+ARCHIVE = @ARCHIVE@
+ARADD = @ARADD@
+LN = @LN_S@
+AWK = @AWK@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+YACC = @YACC@
+
+# FIXME: This won't work for srcdir != objdir. But on the Mac, there
+# is no easy way to build a relative or absolute path, because : means
+# both the path separator, and the "go up a directory" indicator.
+#SRCTOP = $(srcdir)$(S)$(BUILDTOP)
+SRCTOP = $(BUILDTOP)
+SUBDIRS = @subdirs@
+
+TOPLIBD = $(BUILDTOP)$(S)lib
+
+OBJEXT = c.o
+LIBEXT = a
+EXEEXT =
+
+all::
+# Generated automatically from Makefile.in by configure.
+CFLAGS = $(CCOPTS) $(DEFS) -I$(srcdir)/../des
+
+##DOSBUILDTOP = ..\..\..
+##DOSLIBNAME=..\crypto.lib
+##DOS!include $(BUILDTOP)\config\windows.in
+
+OBJS= md5.$(OBJEXT) md5glue.$(OBJEXT) md5crypto.$(OBJEXT)
+
+SRCS= $(srcdir)/md5.c $(srcdir)/md5glue.c $(srcdir)/md5crypto.c
+
+all:: $(OBJS)
+
+t_mddriver: t_mddriver.o md5.o
+ $(CC) $(CFLAGS) $(LDFLAGS) -o t_mddriver t_mddriver.o md5.o
+
+t_mddriver.exe:
+ $(CC) $(CFLAGS2) -o t_mddriver.exe t_mddriver.c md5.c
+
+check:: t_mddriver$(EXEEXT)
+ $(C)t_mddriver$(EXEEXT) -x
+
+clean::
+ $(RM) t_mddriver$(EXEEXT) t_mddriver.$(OBJEXT)
+# config/post.in
+# put all:: first just in case no other rules occur here
+#
+all::
+
+check::
+
+clean:: clean-$(WHAT)
+ $(RM) config.log pre.out post.out Makefile.out
+
+clean-unix::
+ if test -n "$(OBJS)" ; then $(RM) $(OBJS); else :; fi
+
+clean-windows::
+ $(RM) *.$(OBJEXT)
+ $(RM) msvc.pdb *.err
diff --git a/testsuite/mac-mf.sed b/testsuite/mac-mf.sed
new file mode 100644
index 0000000..9b08e60
--- /dev/null
+++ b/testsuite/mac-mf.sed
@@ -0,0 +1,154 @@
+# Rewrite default rules from .c.o: to .c.o: .c
+/^\./s/^\(\.[a-z]*\)\(\.[a-z]*\)\( *: *\)$/\1\2\3 \1/
+
+# Change dependency char.
+/::/s/::/ \\Option-f\\Option-f /g
+/:/s/:/ \\Option-f /g
+/^[SU]=/s/ \\Option-f /:/g
+
+# Change syntax of Makefile vars.
+/\$/s/\${\([a-zA-Z0-9_]*\)}/{\1}/g
+/\$/s/\$(\([a-zA-Z0-9_]*\))/{\1}/g
+
+# Change $@ to {targ}
+/\$@/s/\$@/{targ}/g
+
+# Change pathname syntax.
+#
+# If line ends with .. then assume it sets a variable that will
+# be used to prefix something else -- eliminate one colon, assuming
+# that a slash after the ${name} will turn into the missing colon.
+# Mac pathname conventions are IRREGULAR and UGLY!
+/\./s,\.\./\.\.$,::,
+/\./s,\.\.$,:,
+# Same if it ends with . (a single dot); turn it into nothing.
+/\./s,\.$,,g
+# Rules for .. and . elsewhere in the line
+# Convert ../: to ::, recur to get whole paths.
+/\./s,\.\./:,::,g
+# Convert ../../ to :::
+/\./s,\.\./\.\./,:::,g
+/\./s,\.\./,::,g
+/\.\//s,\./,:,g
+/\//s,/,:,g
+
+/=/s/ = \.$/ = :/
+
+# Comment out any explicit srcdir setting.
+# /srcdir/s/^srcdir/# srcdir/
+
+/version/s/^version=/# version=/
+
+/BASEDIR/s/^BASEDIR =.*$/BASEDIR = "{srcroot}"/
+/{BASEDIR}:/s/{BASEDIR}:/{BASEDIR}/g
+# The original lines screw up -I$(srcdir)/../des by eliminating a colon.
+# Proposed fix: Eliminate srcdir prefixes totally.
+#/{srcdir}:/s/{srcdir}:/"{srcdir}"/g
+/{srcdir}:/s/{srcdir}://g
+#/"{srcdir}":/s/"{srcdir}":/"{srcdir}"/g
+
+# Comment out settings of anything set by mpw host config.
+##/CC/s/^CC *=/#CC =/
+##/CFLAGS/s/^CFLAGS *=/#CFLAGS =/
+##/LDFLAGS/s/^LDFLAGS *=/#LDFLAGS =/
+
+# Change -I usage.
+/-I/s/-I\./-i :/g
+/-I/s/-I::bfd/-i ::bfd:/g
+/-I/s/-I::include/-i ::include:/g
+/-I/s/-I/-i /g
+
+# Change -D usage.
+/-D/s/\([ =]\)-D\([^ ]*\)/\1-d \2/g
+
+# Change continuation char.
+/\\$/s/\\$/\\Option-d/
+
+# Change wildcard char.
+/^[^#]/s/\*/\\Option-x/g
+
+# Change path of various types of source files.
+#/\.[chly]/s/\([ ><=]\)\([-a-zA-Z0-9_$:"]*\)\.\([chly]\)/\1"{s}"\2.\3/g
+#/\.[chly]/s/^\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/"{s}"\1.\2/g
+# Skip the {s} and {o} business for now...
+# Fix some overenthusiasms.
+#/{s}/s/"{s}""{srcdir}"/"{srcdir}"/g
+#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)dir}/"{\1dir}"/g
+#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)DIR}/"{\1DIR}"/g
+#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)dir}"/"{\1dir}"/g
+#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)DIR}"/"{\1DIR}"/g
+#/{s}/s/"{s}":/:/g
+#/{s}/s/^"{s}"//g
+#/^\./s/"{s}"\././g
+
+# Change extension and path of objects, except in the OBJEXT line.
+#/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1"{o}"\2.c.o/g
+#/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/"{o}"\1.c.o/g
+# Skip the {o} stuff for now...
+/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1\2.c.o/g
+/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/\1.c.o/g
+# Clean up.
+#/\.o/s/"{o}""{o}"/"{o}"/g
+#/{o}/s/^"{o}"\([a-zA-Z0-9_]*\)=/\1=/g
+
+# Change extension of libs.
+# /\.a/s/lib\([a-z]*\)\.a/lib\1.o/g
+
+# Remove non-echo option.
+/^ -/s/^ -/ /
+
+# Change cp to duplicate.
+# /cp/s/^\([ ]*\)cp /\1Duplicate -d -y /
+# Change mv to rename.
+# /mv/s/^\([ ]*\)mv /\1Rename -y /
+# /Rename/s/^\([ ]*\)Rename -y -f/\1Rename -y/
+# Change rm to delete.
+/^RM=/s/rm -f/Delete -i -y/
+# /rm/s/^\([ ]*\)rm /\1Delete -y /
+# /Delete/s/^\([ ]*\)Delete -y -f/\1Delete -y/
+# Comment out symlinking.
+# /ln/s/^\([ ]*\)ln /\1# ln /
+
+# Remove -c from explicit compiler calls.
+# /-c/s/{CC}\(.*\) -c \(.*\)\([-a-z]*\)\.c/{CC}\1 \2\3.c -o "{o}"\3.c.o/g
+# Don't ask... prev subst seems to omit the second filename.
+# /-o/s/\([-a-z]*\)\.c -o "{o}".c.o/\1\.c -o "{o}"\1.c.o/
+
+# Change linking cc to link.
+/LDFLAGS/ s/{CC} \(.*\){CFLAGS}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/
+/CFLAGS_LINK/s/{CC} \(.*\){CFLAGS_LINK}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/
+
+# Comment out .PHONY rules.
+/\.PHONY/s/^\.PHONY/# \.PHONY/
+# Comment out .SUFFIXES rules.
+/\.SUFFIXES/s/^\.SUFFIXES/# \.SUFFIXES/
+# Comment out .PRECIOUS rules.
+/\.PRECIOUS/s/^\.PRECIOUS/# \.PRECIOUS/
+## Comment out default rules.
+##/^\./s/^\(\.[a-z]*\.[a-z]* \)/# \1/
+
+#
+# End of original hack-mf.sed
+#
+# Begin original hack-mf2.sed
+#
+# Transform expressions.
+
+# Set the install program appropriate.
+# /INSTALL/s/^INSTALL *= *`.*`:install.sh -c/INSTALL = Duplicate -y/
+
+# Include from the extra-include dir.
+# /^INCLUDES = /s/^INCLUDES = /INCLUDES = -i "{srcroot}"extra-include /
+
+# Yuck - remove unconverted autoconf things.
+# /@/s/@[^ ]*@//g
+
+# Hackery, pure and simple
+# To speed up compiles, remove duplicated -i options.
+/-i/s/\(-i [^ ]*\) \1 /\1 /g
+
+# Note! There are 8-bit characters in the three lines below:
+# 0xc4, 0xb6, 0xc5.
+/Option/s/\\Option-f/Ä/g
+/Option/s/\\Option-d/¶/g
+/Option/s/\\Option-x/Å/g
diff --git a/testsuite/madding.good b/testsuite/madding.good
new file mode 100644
index 0000000..537ab50
--- /dev/null
+++ b/testsuite/madding.good
@@ -0,0 +1 @@
+The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude."
diff --git a/testsuite/madding.inp b/testsuite/madding.inp
new file mode 100644
index 0000000..2367bc8
--- /dev/null
+++ b/testsuite/madding.inp
@@ -0,0 +1 @@
+The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity."
diff --git a/testsuite/madding.sed b/testsuite/madding.sed
new file mode 100644
index 0000000..5494f2b
--- /dev/null
+++ b/testsuite/madding.sed
@@ -0,0 +1,8 @@
+# this is from Thomas Hardy's _Far From the Madding Crowd_.
+#
+# cf ftp://ftp.cdrom.com/pub/gutenberg/etext94/crowd10a.txt
+#
+# the point of this test, in case it isn't obvious, is to overfill fixed
+# buffers wherever they might be.
+#
+s/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity."/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude."/
diff --git a/testsuite/manis.good b/testsuite/manis.good
new file mode 100644
index 0000000..f349b76
--- /dev/null
+++ b/testsuite/manis.good
@@ -0,0 +1,22 @@
+s%@CFLAGS@%%g
+s%@CPPFLAGS@%-I/%g
+s%@CXXFLAGS@%-x c++%g
+s%@DEFS@%$DEFS%g
+s%@LDFLAGS@%-L/usr/lib%g
+s%@LIBS@%-lgnu -lbfd%g
+s%@exec_prefix@%%g
+s%@prefix@%$prefix%g
+s%@RANLIB@%$RANLIB%g
+s%@CC@%/usr/local/bin/gcc%g
+s%@CPP@%$CPP%g
+s%@XCFLAGS@%$XCFLAGS%g
+s%@XINCLUDES@%$XINCLUDES%g
+s%@XLIBS@%$XLIBS%g
+s%@XPROGS@%$XPROGS%g
+s%@TCLHDIR@%$TCLHDIR%g
+s%@TCLLIB@%$TCLLIB%g
+s%@TKHDIR@%$TKHDIR%g
+s%@TKLIB@%$TKLIB%g
+s%@PTY_TYPE@%$PTY_TYPE%g
+s%@EVENT_TYPE@%$EVENT_TYPE%g
+s%@SETUID@%$SETUID%g
diff --git a/testsuite/manis.inp b/testsuite/manis.inp
new file mode 100644
index 0000000..f349b76
--- /dev/null
+++ b/testsuite/manis.inp
@@ -0,0 +1,22 @@
+s%@CFLAGS@%%g
+s%@CPPFLAGS@%-I/%g
+s%@CXXFLAGS@%-x c++%g
+s%@DEFS@%$DEFS%g
+s%@LDFLAGS@%-L/usr/lib%g
+s%@LIBS@%-lgnu -lbfd%g
+s%@exec_prefix@%%g
+s%@prefix@%$prefix%g
+s%@RANLIB@%$RANLIB%g
+s%@CC@%/usr/local/bin/gcc%g
+s%@CPP@%$CPP%g
+s%@XCFLAGS@%$XCFLAGS%g
+s%@XINCLUDES@%$XINCLUDES%g
+s%@XLIBS@%$XLIBS%g
+s%@XPROGS@%$XPROGS%g
+s%@TCLHDIR@%$TCLHDIR%g
+s%@TCLLIB@%$TCLLIB%g
+s%@TKHDIR@%$TKHDIR%g
+s%@TKLIB@%$TKLIB%g
+s%@PTY_TYPE@%$PTY_TYPE%g
+s%@EVENT_TYPE@%$EVENT_TYPE%g
+s%@SETUID@%$SETUID%g
diff --git a/testsuite/manis.sed b/testsuite/manis.sed
new file mode 100644
index 0000000..5017845
--- /dev/null
+++ b/testsuite/manis.sed
@@ -0,0 +1,6 @@
+# straight out of an autoconf-generated configure.
+# The input should look just like the input after this is run.
+#
+# Protect against being on the right side of a sed subst in config.status.
+s/%@/@@/; s/@%/@@/; s/%g$/@g/; /@g$/s/[\\\\&%]/\\\\&/g;
+ s/@@/%@/; s/@@/@%/; s/@g$/%g/
diff --git a/testsuite/middle.good b/testsuite/middle.good
new file mode 100644
index 0000000..71f33c1
--- /dev/null
+++ b/testsuite/middle.good
@@ -0,0 +1,3 @@
+ universe. You will never succeed. Everything will fail and come
+ to an end finally. But you have a chance to carve a little bit
+ of order and maybe even beauty out of the raw materials that
diff --git a/testsuite/middle.inp b/testsuite/middle.inp
new file mode 100644
index 0000000..5c4b4a4
--- /dev/null
+++ b/testsuite/middle.inp
@@ -0,0 +1,9 @@
+ "...by imposing a tiny bit of order in a communication you are
+ translating, you are carving out a little bit of order in the
+ universe. You will never succeed. Everything will fail and come
+ to an end finally. But you have a chance to carve a little bit
+ of order and maybe even beauty out of the raw materials that
+ surround you everywhere, and I think there is no greater meaning
+ in life."
+
+ Donald L. Philippi, Oct 1930 - Jan 1993
diff --git a/testsuite/middle.sed b/testsuite/middle.sed
new file mode 100644
index 0000000..3471789
--- /dev/null
+++ b/testsuite/middle.sed
@@ -0,0 +1 @@
+3,5p
diff --git a/testsuite/newjis.good b/testsuite/newjis.good
new file mode 100644
index 0000000..4de16b0
--- /dev/null
+++ b/testsuite/newjis.good
@@ -0,0 +1,4 @@
+$B$H$J$j$NM9JX6I$K(B
+$B$?$F$+$1$?$N$O(B
+$B$?$F$+$1$?$+$C$?$+$i(B
+$B$?$F$+$1$?!#(B
diff --git a/testsuite/newjis.inp b/testsuite/newjis.inp
new file mode 100644
index 0000000..fc710f6
--- /dev/null
+++ b/testsuite/newjis.inp
@@ -0,0 +1,4 @@
+$B$H$J$j$N$?$1$,$-$K(B
+$B$?$F$+$1$?$N$O(B
+$B$?$F$+$1$?$+$C$?$+$i(B
+$B$?$F$+$1$?!#(B
diff --git a/testsuite/newjis.sed b/testsuite/newjis.sed
new file mode 100644
index 0000000..1bc941d
--- /dev/null
+++ b/testsuite/newjis.sed
@@ -0,0 +1 @@
+s/$?$1$,$-/M9JX6I/
diff --git a/testsuite/noeol.good b/testsuite/noeol.good
new file mode 100644
index 0000000..fa5fc0e
--- /dev/null
+++ b/testsuite/noeol.good
@@ -0,0 +1,3 @@
+This file is uniquewakuwaku
+in that it doeswakuwaku
+end in a newline.wakuwaku \ No newline at end of file
diff --git a/testsuite/noeol.inp b/testsuite/noeol.inp
new file mode 100644
index 0000000..c4cf6a1
--- /dev/null
+++ b/testsuite/noeol.inp
@@ -0,0 +1,3 @@
+This file is unique
+in that it does
+end in a newline. \ No newline at end of file
diff --git a/testsuite/noeol.sed b/testsuite/noeol.sed
new file mode 100644
index 0000000..bea7110
--- /dev/null
+++ b/testsuite/noeol.sed
@@ -0,0 +1 @@
+s/$/wakuwaku/g
diff --git a/testsuite/noeolw.1good b/testsuite/noeolw.1good
new file mode 100644
index 0000000..f0f44d9
--- /dev/null
+++ b/testsuite/noeolw.1good
@@ -0,0 +1,7 @@
+This file is unique
+in that it does
+end in a newline.
+This file is unique
+in that it does
+end in a newline.
+in that it does
diff --git a/testsuite/noeolw.2good b/testsuite/noeolw.2good
new file mode 100644
index 0000000..c4cf6a1
--- /dev/null
+++ b/testsuite/noeolw.2good
@@ -0,0 +1,3 @@
+This file is unique
+in that it does
+end in a newline. \ No newline at end of file
diff --git a/testsuite/noeolw.good b/testsuite/noeolw.good
new file mode 100644
index 0000000..e76509a
--- /dev/null
+++ b/testsuite/noeolw.good
@@ -0,0 +1,12 @@
+This file is unique
+This file is unique
+in that it does
+in that it does
+end in a newline.
+end in a newline.
+This file is unique
+This file is unique
+in that it does
+in that it does
+end in a newline.
+end in a newline. \ No newline at end of file
diff --git a/testsuite/noeolw.sed b/testsuite/noeolw.sed
new file mode 100644
index 0000000..0924619
--- /dev/null
+++ b/testsuite/noeolw.sed
@@ -0,0 +1,10 @@
+w noeolw.1out
+$ {
+ x
+ w noeolw.1out
+ x
+}
+h
+1,3w noeolw.2out
+p
+p
diff --git a/testsuite/numsub.good b/testsuite/numsub.good
new file mode 100644
index 0000000..9bdaaef
--- /dev/null
+++ b/testsuite/numsub.good
@@ -0,0 +1 @@
+foo foo fo oo f oo foo foo foo foo foo foo foo bar foo foo foo foo foo
diff --git a/testsuite/numsub.inp b/testsuite/numsub.inp
new file mode 100644
index 0000000..6924c98
--- /dev/null
+++ b/testsuite/numsub.inp
@@ -0,0 +1,2 @@
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
diff --git a/testsuite/numsub.sed b/testsuite/numsub.sed
new file mode 100644
index 0000000..4a96cad
--- /dev/null
+++ b/testsuite/numsub.sed
@@ -0,0 +1,7 @@
+# the first one matches, the second doesn't
+1s/foo/bar/10
+2s/foo/bar/20
+
+# The second line should be deleted. ssed 3.55-3.58 do not.
+t
+d
diff --git a/testsuite/numsub2.good b/testsuite/numsub2.good
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/testsuite/numsub2.good
diff --git a/testsuite/numsub2.inp b/testsuite/numsub2.inp
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/testsuite/numsub2.inp
@@ -0,0 +1 @@
+
diff --git a/testsuite/numsub2.sed b/testsuite/numsub2.sed
new file mode 100644
index 0000000..dddead9
--- /dev/null
+++ b/testsuite/numsub2.sed
@@ -0,0 +1 @@
+s/a*/b/2
diff --git a/testsuite/numsub3.good b/testsuite/numsub3.good
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/testsuite/numsub3.good
diff --git a/testsuite/numsub3.inp b/testsuite/numsub3.inp
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/testsuite/numsub3.inp
@@ -0,0 +1 @@
+
diff --git a/testsuite/numsub3.sed b/testsuite/numsub3.sed
new file mode 100644
index 0000000..0ea96a4
--- /dev/null
+++ b/testsuite/numsub3.sed
@@ -0,0 +1 @@
+s/^a*/b/2
diff --git a/testsuite/numsub4.good b/testsuite/numsub4.good
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/testsuite/numsub4.good
diff --git a/testsuite/numsub4.inp b/testsuite/numsub4.inp
new file mode 100644
index 0000000..b680253
--- /dev/null
+++ b/testsuite/numsub4.inp
@@ -0,0 +1 @@
+z
diff --git a/testsuite/numsub4.sed b/testsuite/numsub4.sed
new file mode 100644
index 0000000..e76c5bf
--- /dev/null
+++ b/testsuite/numsub4.sed
@@ -0,0 +1 @@
+s/^a*/b/2p
diff --git a/testsuite/numsub5.good b/testsuite/numsub5.good
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/testsuite/numsub5.good
diff --git a/testsuite/numsub5.inp b/testsuite/numsub5.inp
new file mode 100644
index 0000000..b680253
--- /dev/null
+++ b/testsuite/numsub5.inp
@@ -0,0 +1 @@
+z
diff --git a/testsuite/numsub5.sed b/testsuite/numsub5.sed
new file mode 100644
index 0000000..d8ecda2
--- /dev/null
+++ b/testsuite/numsub5.sed
@@ -0,0 +1 @@
+s/a*/b/3p
diff --git a/testsuite/ptestcases.h b/testsuite/ptestcases.h
new file mode 100644
index 0000000..506b1cc
--- /dev/null
+++ b/testsuite/ptestcases.h
@@ -0,0 +1,326 @@
+ { 0, 0, "2.8.2 Regular Expression General Requirement", NULL, },
+ { 2, 4, "bb*", "abbbc", },
+ { 2, 2, "bb*", "ababbbc", },
+ { 7, 9, "A#*::", "A:A#:qA::qA#::qA##::q", },
+ { 1, 5, "A#*::", "A##::A#::qA::qA#:q", },
+ { 0, 0, "2.8.3.1.2 BRE Special Characters", NULL, },
+ { 0, 0, "GA108", NULL, },
+ { 2, 2, "\\.", "a.c", },
+ { 2, 2, "\\[", "a[c", },
+ { 2, 2, "\\\\", "a\\c", },
+ { 2, 2, "\\*", "a*c", },
+ { 2, 2, "\\^", "a^c", },
+ { 2, 2, "\\$", "a$c", },
+ { 7, 11, "X\\*Y\\*8", "Y*8X*8X*Y*8", },
+ { 0, 0, "GA109", NULL, },
+ { 2, 2, "[.]", "a.c", },
+ { 2, 2, "[[]", "a[c", },
+ { -1, -1, "[[]", "ac", },
+ { 2, 2, "[\\]", "a\\c", },
+ { 1, 1, "[\\a]", "abc", },
+ { 2, 2, "[\\.]", "a\\.c", },
+ { 2, 2, "[\\.]", "a.\\c", },
+ { 2, 2, "[*]", "a*c", },
+ { 2, 2, "[$]", "a$c", },
+ { 2, 2, "[X*Y8]", "7*8YX", },
+ { 0, 0, "GA110", NULL, },
+ { 2, 2, "*", "a*c", },
+ { 3, 4, "*a", "*b*a*c", },
+ { 1, 5, "**9=", "***9=9", },
+ { 0, 0, "GA111", NULL, },
+ { 1, 1, "^*", "*bc", },
+ { -1, -1, "^*", "a*c", },
+ { -1, -1, "^*", "^*ab", },
+ { 1, 5, "^**9=", "***9=", },
+ { -1, -1, "^*5<*9", "5<9*5<*9", },
+ { 0, 0, "GA112", NULL, },
+ { 2, 3, "\\(*b\\)", "a*b", },
+ { -1, -1, "\\(*b\\)", "ac", },
+ { 1, 6, "A\\(**9\\)=", "A***9=79", },
+ { 0, 0, "GA113(1)", NULL, },
+ { 1, 3, "\\(^*ab\\)", "*ab", },
+ { -1, -1, "\\(^*ab\\)", "^*ab", },
+ { -1, -1, "\\(^*b\\)", "a*b", },
+ { -1, -1, "\\(^*b\\)", "^*b", },
+ { 0, 0, "GA114", NULL, },
+ { 1, 3, "a^b", "a^b", },
+ { 1, 3, "a\\^b", "a^b", },
+ { 1, 1, "^^", "^bc", },
+ { 2, 2, "\\^", "a^c", },
+ { 1, 1, "[c^b]", "^abc", },
+ { 1, 1, "[\\^ab]", "^ab", },
+ { 2, 2, "[\\^ab]", "c\\d", },
+ { -1, -1, "[^^]", "^", },
+ { 1, 3, "\\(a^b\\)", "a^b", },
+ { 1, 3, "\\(a\\^b\\)", "a^b", },
+ { 2, 2, "\\(\\^\\)", "a^b", },
+ { 0, 0, "GA115", NULL, },
+ { 3, 3, "$$", "ab$", },
+ { -1, -1, "$$", "$ab", },
+ { 2, 3, "$c", "a$c", },
+ { 2, 2, "[$]", "a$c", },
+ { 1, 2, "\\$a", "$a", },
+ { 3, 3, "\\$$", "ab$", },
+ { 2, 6, "A\\([34]$[34]\\)B", "XA4$3BY", },
+ { 0, 0, "2.8.3.1.3 Periods in BREs", NULL, },
+ { 0, 0, "GA116", NULL, },
+ { 1, 1, ".", "abc", },
+ { -1, -1, ".ab", "abc", },
+ { 1, 3, "ab.", "abc", },
+ { 1, 3, "a.b", "a,b", },
+ { -1, -1, ".......", "PqRs6", },
+ { 1, 7, ".......", "PqRs6T8", },
+ { 0, 0, "2.8.3.2 RE Bracket Expression", NULL, },
+ { 0, 0, "GA118", NULL, },
+ { 2, 2, "[abc]", "xbyz", },
+ { -1, -1, "[abc]", "xyz", },
+ { 2, 2, "[abc]", "xbay", },
+ { 0, 0, "GA119", NULL, },
+ { 2, 2, "[^a]", "abc", },
+ { 4, 4, "[^]cd]", "cd]ef", },
+ { 2, 2, "[^abc]", "axyz", },
+ { -1, -1, "[^abc]", "abc", },
+ { 3, 3, "[^[.a.]b]", "abc", },
+ { 3, 3, "[^[=a=]b]", "abc", },
+ { 2, 2, "[^-ac]", "abcde-", },
+ { 2, 2, "[^ac-]", "abcde-", },
+ { 3, 3, "[^a-b]", "abcde", },
+ { 3, 3, "[^a-bd-e]", "dec", },
+ { 2, 2, "[^---]", "-ab", },
+ { 16, 16, "[^a-zA-Z0-9]", "pqrstVWXYZ23579#", },
+ { 0, 0, "GA120(1)", NULL, },
+ { 3, 3, "[]a]", "cd]ef", },
+ { 1, 1, "[]-a]", "a_b", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 1, 1, "[]^a-z]", "string", },
+ { 0, 0, "GA120(2)", NULL, },
+ { 4, 4, "[^]cd]", "cd]ef", },
+ { 0, 0, "[^]]*", "]]]]]]]]X", },
+ { 0, 0, "[^]]*", "]]]]]]]]", },
+ { 9, 9, "[^]]\\{1,\\}", "]]]]]]]]X", },
+ { -1, -1, "[^]]\\{1,\\}", "]]]]]]]]", },
+ { 0, 0, "GA120(3)", NULL, },
+ { 3, 3, "[c[.].]d]", "ab]cd", },
+ { 2, 8, "[a-z]*[[.].]][A-Z]*", "Abcd]DEFg", },
+ { 0, 0, "GA121", NULL, },
+ { 2, 2, "[[.a.]b]", "Abc", },
+ { 1, 1, "[[.a.]b]", "aBc", },
+ { -1, -1, "[[.a.]b]", "ABc", },
+ { 3, 3, "[^[.a.]b]", "abc", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 3, 3, "[A-[.].]c]", "ab]!", },
+ { 0, 0, "GA122", NULL, },
+ { -2, -2, "[[.ch.]]", "abc", },
+ { -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
+ { 0, 0, "GA125", NULL, },
+ { 2, 2, "[[=a=]b]", "Abc", },
+ { 1, 1, "[[=a=]b]", "aBc", },
+ { -1, -1, "[[=a=]b]", "ABc", },
+ { 3, 3, "[^[=a=]b]", "abc", },
+ { 0, 0, "GA126", NULL, },
+ { 0, 0, NULL, "the expected result for [[:alnum:]]* is 2-7 which is wrong" },
+ { 0, 0, "[[:alnum:]]*", " aB28gH", },
+ { 2, 7, "[[:alnum:]][[:alnum:]]*", " aB28gH", },
+ { 0, 0, NULL, "the expected result for [^[:alnum:]]* is 2-5 which is wrong" },
+ { 0, 0, "[^[:alnum:]]*", "2 ,a", },
+ { 2, 5, "[^[:alnum:]][^[:alnum:]]*", "2 ,a", },
+ { 0, 0, NULL, "the expected result for [[:alpha:]]* is 2-5 which is wrong" },
+ { 0, 0, "[[:alpha:]]*", " aBgH2", },
+ { 2, 5, "[[:alpha:]][[:alpha:]]*", " aBgH2", },
+ { 1, 6, "[^[:alpha:]]*", "2 8,a", },
+ { 1, 2, "[[:blank:]]*", " \r", },
+ { 1, 8, "[^[:blank:]]*", "aB28gH, ", },
+ { 1, 2, "[[:cntrl:]]*", "  ", },
+ { 1, 8, "[^[:cntrl:]]*", "aB2 8gh,", },
+ { 0, 0, NULL, "the expected result for [[:digit:]]* is 2-3 which is wrong" },
+ { 0, 0, "[[:digit:]]*", "a28", },
+ { 2, 3, "[[:digit:]][[:digit:]]*", "a28", },
+ { 1, 8, "[^[:digit:]]*", "aB gH,", },
+ { 1, 7, "[[:graph:]]*", "aB28gH, ", },
+ { 1, 3, "[^[:graph:]]*", " ,", },
+ { 1, 2, "[[:lower:]]*", "agB", },
+ { 1, 8, "[^[:lower:]]*", "B2 8H,a", },
+ { 1, 8, "[[:print:]]*", "aB2 8gH, ", },
+ { 1, 2, "[^[:print:]]*", "  ", },
+ { 0, 0, NULL, "the expected result for [[:punct:]]* is 2-2 which is wrong" },
+ { 0, 0, "[[:punct:]]*", "a,2", },
+ { 2, 3, "[[:punct:]][[:punct:]]*", "a,,2", },
+ { 1, 9, "[^[:punct:]]*", "aB2 8gH", },
+ { 1, 3, "[[:space:]]*", " \r", },
+ { 0, 0, NULL, "the expected result for [^[:space:]]* is 2-9 which is wrong" },
+ { 0, 0, "[^[:space:]]*", " aB28gH, ", },
+ { 2, 9, "[^[:space:]][^[:space:]]*", " aB28gH, ", },
+ { 0, 0, NULL, "the expected result for [[:upper:]]* is 2-3 which is wrong" },
+ { 0, 0, "[[:upper:]]*", "aBH2", },
+ { 2, 3, "[[:upper:]][[:upper:]]*", "aBH2", },
+ { 1, 8, "[^[:upper:]]*", "a2 8g,B", },
+ { 0, 0, NULL, "the expected result for [[:xdigit:]]* is 2-5 which is wrong" },
+ { 0, 0, "[[:xdigit:]]*", "gaB28h", },
+ { 2, 5, "[[:xdigit:]][[:xdigit:]]*", "gaB28h", },
+ { 0, 0, NULL, "the expected result for [^[:xdigit:]]* is 2-7 which is wrong" },
+ { 2, 7, "[^[:xdigit:]][^[:xdigit:]]*", "a gH,2", },
+ { 0, 0, "GA127", NULL, },
+ { -2, -2, "[b-a]", "abc", },
+ { 1, 1, "[a-c]", "bbccde", },
+ { 2, 2, "[a-b]", "-bc", },
+ { 3, 3, "[a-z0-9]", "AB0", },
+ { 3, 3, "[^a-b]", "abcde", },
+ { 3, 3, "[^a-bd-e]", "dec", },
+ { 1, 1, "[]-a]", "a_b", },
+ { 2, 2, "[+--]", "a,b", },
+ { 2, 2, "[--/]", "a.b", },
+ { 2, 2, "[^---]", "-ab", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 3, 3, "[A-[.].]c]", "ab]!", },
+ { 2, 6, "bc[d-w]xy", "abchxyz", },
+ { 0, 0, "GA129", NULL, },
+ { 1, 1, "[a-cd-f]", "dbccde", },
+ { -1, -1, "[a-ce-f]", "dBCCdE", },
+ { 2, 4, "b[n-zA-M]Y", "absY9Z", },
+ { 2, 4, "b[n-zA-M]Y", "abGY9Z", },
+ { 0, 0, "GA130", NULL, },
+ { 3, 3, "[-xy]", "ac-", },
+ { 2, 4, "c[-xy]D", "ac-D+", },
+ { 2, 2, "[--/]", "a.b", },
+ { 2, 4, "c[--/]D", "ac.D+b", },
+ { 2, 2, "[^-ac]", "abcde-", },
+ { 1, 3, "a[^-ac]c", "abcde-", },
+ { 3, 3, "[xy-]", "zc-", },
+ { 2, 4, "c[xy-]7", "zc-786", },
+ { 2, 2, "[^ac-]", "abcde-", },
+ { 2, 4, "a[^ac-]c", "5abcde-", },
+ { 2, 2, "[+--]", "a,b", },
+ { 2, 4, "a[+--]B", "Xa,By", },
+ { 2, 2, "[^---]", "-ab", },
+ { 4, 6, "X[^---]Y", "X-YXaYXbY", },
+ { 0, 0, "2.8.3.3 BREs Matching Multiple Characters", NULL, },
+ { 0, 0, "GA131", NULL, },
+ { 3, 4, "cd", "abcdeabcde", },
+ { 1, 2, "ag*b", "abcde", },
+ { -1, -1, "[a-c][e-f]", "abcdef", },
+ { 3, 4, "[a-c][e-f]", "acbedf", },
+ { 4, 8, "abc*XYZ", "890abXYZ#*", },
+ { 4, 9, "abc*XYZ", "890abcXYZ#*", },
+ { 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
+ { -1, -1, "abc*XYZ", "890abc*XYZ#*", },
+ { 0, 0, "GA132", NULL, },
+ { 2, 4, "\\(*bc\\)", "a*bc", },
+ { 1, 2, "\\(ab\\)", "abcde", },
+ { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { 3, 8, "43\\(2\\(6\\)*0\\)AB", "654320ABCD", },
+ { 3, 9, "43\\(2\\(7\\)*0\\)AB", "6543270ABCD", },
+ { 3, 12, "43\\(2\\(7\\)*0\\)AB", "6543277770ABCD", },
+ { 0, 0, "GA133", NULL, },
+ { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { -1, -1, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(k\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { 0, 0, "GA134", NULL, },
+ { 2, 4, "\\(bb*\\)", "abbbc", },
+ { 2, 2, "\\(bb*\\)", "ababbbc", },
+ { 1, 6, "a\\(.*b\\)", "ababbbc", },
+ { 1, 2, "a\\(b*\\)", "ababbbc", },
+ { 1, 20, "a\\(.*b\\)c", "axcaxbbbcsxbbbbbbbbc", },
+ { 0, 0, "GA135", NULL, },
+ { 1, 7, "\\(a\\(b\\(c\\(d\\(e\\)\\)\\)\\)\\)\\4", "abcdededede", },
+ { 0, 0, NULL, "POSIX does not really specify whether a\\(b\\)*c\\1 matches acb." },
+ { 0, 0, NULL, "back references are supposed to expand to the last match, but what" },
+ { 0, 0, NULL, "if there never was a match as in this case?" },
+ { -1, -1, "a\\(b\\)*c\\1", "acb", },
+ { 1, 11, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)\\9", "abcdefghijjk", },
+ { 0, 0, "GA136", NULL, },
+ { 0, 0, NULL, "These two tests have the same problem as the test in GA135. No match" },
+ { 0, 0, NULL, "of a subexpression, why should the back reference be usable?" },
+ { 0, 0, NULL, "1 2 a\\(b\\)*c\\1 acb" },
+ { 0, 0, NULL, "4 7 a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4¦xYzabcdePQRST" },
+ { -1, -1, "a\\(b\\)*c\\1", "acb", },
+ { -1, -1, "a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4", "xYzabcdePQRST", },
+ { 0, 0, "GA137", NULL, },
+ { -2, -2, "\\(a\\(b\\)\\)\\3", "foo", },
+ { -2, -2, "\\(a\\(b\\)\\)\\(a\\(b\\)\\)\\5", "foo", },
+ { 0, 0, "GA138", NULL, },
+ { 1, 2, "ag*b", "abcde", },
+ { 1, 10, "a.*b", "abababvbabc", },
+ { 2, 5, "b*c", "abbbcdeabbbbbbcde", },
+ { 2, 5, "bbb*c", "abbbcdeabbbbbbcde", },
+ { 1, 5, "a\\(b\\)*c\\1", "abbcbbb", },
+ { -1, -1, "a\\(b\\)*c\\1", "abbdbd", },
+ { 0, 0, "\\([a-c]*\\)\\1", "abcacdef", },
+ { 1, 6, "\\([a-c]*\\)\\1", "abcabcabcd", },
+ { 1, 2, "a^*b", "ab", },
+ { 1, 5, "a^*b", "a^^^b", },
+ { 0, 0, "GA139", NULL, },
+ { 1, 2, "a\\{2\\}", "aaaa", },
+ { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", },
+ { 1, 2, "\\(a\\)\\1\\{1,2\\}", "aabc", },
+ { 1, 3, "\\(a\\)\\1\\{1,2\\}", "aaaabc", },
+ { 0, 0, NULL, "the expression \\(\\(a\\)\\1\\)\\{1,2\\} is ill-formed, using \\2" },
+ { 1, 4, "\\(\\(a\\)\\2\\)\\{1,2\\}", "aaaabc", },
+ { 0, 0, "GA140", NULL, },
+ { 1, 2, "a\\{2\\}", "aaaa", },
+ { -1, -1, "a\\{2\\}", "abcd", },
+ { 0, 0, "a\\{0\\}", "aaaa", },
+ { 1, 64, "a\\{64\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "GA141", NULL, },
+ { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", },
+ { 0, 0, NULL, "the expected result for \\([a-c]*\\)\\{2,\\} is failure which isn't correct" },
+ { 1, 3, "\\([a-c]*\\)\\{2,\\}", "abcdefg", },
+ { 1, 3, "\\([a-c]*\\)\\{1,\\}", "abcdefg", },
+ { -1, -1, "a\\{64,\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "GA142", NULL, },
+ { 1, 3, "a\\{2,3\\}", "aaaa", },
+ { -1, -1, "a\\{2,3\\}", "abcd", },
+ { 0, 0, "\\([a-c]*\\)\\{0,0\\}", "foo", },
+ { 1, 63, "a\\{1,63\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "2.8.3.4 BRE Precedence", NULL, },
+ { 0, 0, "GA143", NULL, },
+ { 0, 0, NULL, "There are numerous bugs in the original version." },
+ { 2, 19, "\\^\\[[[.].]]\\\\(\\\\1\\\\)\\*\\\\{1,2\\\\}\\$", "a^[]\\(\\1\\)*\\{1,2\\}$b", },
+ { 1, 6, "[[=*=]][[=\\=]][[=]=]][[===]][[...]][[:punct:]]", "*\\]=.;", },
+ { 1, 6, "[$\\(*\\)^]*", "$\\()*^", },
+ { 1, 1, "[\\1]", "1", },
+ { 1, 1, "[\\{1,2\\}]", "{", },
+ { 0, 0, NULL, "the expected result for \\(*\\)*\\1* is 2-2 which isn't correct" },
+ { 0, 0, "\\(*\\)*\\1*", "a*b*11", },
+ { 2, 3, "\\(*\\)*\\1*b", "a*b*11", },
+ { 0, 0, NULL, "the expected result for \\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\) is 1-5 which isn't correct" },
+ { 1, 3, "\\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\)", "abbab", },
+ { 1, 5, "\\(a\\(b\\{1,2\\}\\)\\)\\{1,2\\}", "abbab", },
+ { 1, 1, "^\\(^\\(^a$\\)$\\)$", "a", },
+ { 1, 2, "\\(a\\)\\1$", "aa", },
+ { 1, 3, "ab*", "abb", },
+ { 1, 4, "ab\\{2,4\\}", "abbbc", },
+ { 0, 0, "2.8.3.5 BRE Expression Anchoring", NULL, },
+ { 0, 0, "GA144", NULL, },
+ { 1, 1, "^a", "abc", },
+ { -1, -1, "^b", "abc", },
+ { -1, -1, "^[a-zA-Z]", "99Nine", },
+ { 1, 4, "^[a-zA-Z]*", "Nine99", },
+ { 0, 0, "GA145(1)", NULL, },
+ { 1, 2, "\\(^a\\)\\1", "aabc", },
+ { -1, -1, "\\(^a\\)\\1", "^a^abc", },
+ { 1, 2, "\\(^^a\\)", "^a", },
+ { 1, 1, "\\(^^\\)", "^^", },
+ { 1, 3, "\\(^abc\\)", "abcdef", },
+ { -1, -1, "\\(^def\\)", "abcdef", },
+ { 0, 0, "GA146", NULL, },
+ { 3, 3, "a$", "cba", },
+ { -1, -1, "a$", "abc", },
+ { 5, 7, "[a-z]*$", "99ZZxyz", },
+ { 0, 0, NULL, "the expected result for [a-z]*$ is failure which isn't correct" },
+ { 10, 9, "[a-z]*$", "99ZZxyz99", },
+ { 3, 3, "$$", "ab$", },
+ { -1, -1, "$$", "$ab", },
+ { 3, 3, "\\$$", "ab$", },
+ { 0, 0, "GA147(1)", NULL, },
+ { -1, -1, "\\(a$\\)\\1", "bcaa", },
+ { -1, -1, "\\(a$\\)\\1", "ba$", },
+ { -1, -1, "\\(ab$\\)", "ab$", },
+ { 1, 2, "\\(ab$\\)", "ab", },
+ { 4, 6, "\\(def$\\)", "abcdef", },
+ { -1, -1, "\\(abc$\\)", "abcdef", },
+ { 0, 0, "GA148", NULL, },
+ { 0, 0, "^$", "", },
+ { 1, 3, "^abc$", "abc", },
+ { -1, -1, "^xyz$", "^xyz^", },
+ { -1, -1, "^234$", "^234$", },
+ { 1, 9, "^[a-zA-Z0-9]*$", "2aA3bB9zZ", },
+ { -1, -1, "^[a-z0-9]*$", "2aA3b#B9zZ", },
diff --git a/testsuite/readin.good b/testsuite/readin.good
new file mode 100644
index 0000000..a3ba1b6
--- /dev/null
+++ b/testsuite/readin.good
@@ -0,0 +1,21 @@
+``Democracy will not come today, this year,
+ nor ever through compromise and fear.
+MOO
+ I have as much right as the other fellow has
+ to stand on my two feet and own the land.
+MOO
+ I tire so of hearing people say
+ let things take their course,
+ tomorrow is another day.
+MOO
+ I do not need my freedom when I'm dead.
+MOO
+ I cannot live on tomorrow's bread.
+MOO
+ Freedom is a strong seed
+ planted in a great need.
+MOO
+ I live here, too.
+MOO
+ I want freedom just as you.''
+ ``The Weary Blues'', Langston Hughes
diff --git a/testsuite/readin.in2 b/testsuite/readin.in2
new file mode 100644
index 0000000..fa93196
--- /dev/null
+++ b/testsuite/readin.in2
@@ -0,0 +1 @@
+MOO
diff --git a/testsuite/readin.inp b/testsuite/readin.inp
new file mode 100644
index 0000000..95fb969
--- /dev/null
+++ b/testsuite/readin.inp
@@ -0,0 +1,14 @@
+``Democracy will not come today, this year,
+ nor ever through compromise and fear.
+ I have as much right as the other fellow has
+ to stand on my two feet and own the land.
+ I tire so of hearing people say
+ let things take their course,
+ tomorrow is another day.
+ I do not need my freedom when I'm dead.
+ I cannot live on tomorrow's bread.
+ Freedom is a strong seed
+ planted in a great need.
+ I live here, too.
+ I want freedom just as you.''
+ ``The Weary Blues'', Langston Hughes
diff --git a/testsuite/readin.sed b/testsuite/readin.sed
new file mode 100644
index 0000000..adcc6bc
--- /dev/null
+++ b/testsuite/readin.sed
@@ -0,0 +1 @@
+/\.$/r readin.in2
diff --git a/testsuite/recall.good b/testsuite/recall.good
new file mode 100644
index 0000000..230cc08
--- /dev/null
+++ b/testsuite/recall.good
@@ -0,0 +1,7 @@
+eeefff
+Xeefff
+XYefff
+XYeYff
+XYeYYf
+XYeYYY
+XYeYYY
diff --git a/testsuite/recall.inp b/testsuite/recall.inp
new file mode 100644
index 0000000..ef34b7e
--- /dev/null
+++ b/testsuite/recall.inp
@@ -0,0 +1 @@
+eeefff
diff --git a/testsuite/recall.sed b/testsuite/recall.sed
new file mode 100644
index 0000000..c1d7f9c
--- /dev/null
+++ b/testsuite/recall.sed
@@ -0,0 +1,7 @@
+# Check that the empty regex recalls the last *executed* regex,
+# not the last *compiled* regex
+p
+s/e/X/p
+:x
+s//Y/p
+/f/bx
diff --git a/testsuite/runptests.c b/testsuite/runptests.c
new file mode 100644
index 0000000..4d43180
--- /dev/null
+++ b/testsuite/runptests.c
@@ -0,0 +1,123 @@
+/* POSIX regex testsuite from IEEE 2003.2.
+ Copyright (C) 1998, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Data structure to describe the tests. */
+struct test
+{
+ int start;
+ int end;
+ const char *reg;
+ const char *str;
+ int options;
+} tests[] =
+{
+#include "ptestcases.h"
+};
+
+
+int
+main (int argc, char *argv[])
+{
+ size_t cnt;
+ int errors = 0;
+
+ for (cnt = 0; cnt < sizeof (tests) / sizeof (tests[0]); ++cnt)
+ if (tests[cnt].str == NULL)
+ {
+ printf ("\n%s\n%.*s\n", tests[cnt].reg,
+ (int) strlen (tests[cnt].reg),
+ "-----------------------------------------------------");
+ }
+ else if (tests[cnt].reg == NULL)
+ printf ("!!! %s\n", tests[cnt].str);
+ else
+ {
+ regex_t re;
+ regmatch_t match[20];
+ int err;
+
+ printf ("regexp: \"%s\", string: \"%s\" -> ", tests[cnt].reg,
+ tests[cnt].str);
+
+ /* Compile the expression. */
+ err = regcomp (&re, tests[cnt].reg, tests[cnt].options);
+ if (err != 0)
+ {
+ if (tests[cnt].start == -2)
+ puts ("compiling failed, OK");
+ else
+ {
+ char buf[100];
+ regerror (err, &re, buf, sizeof (buf));
+ printf ("FAIL: %s\n", buf);
+ ++errors;
+ }
+
+ continue;
+ }
+ else if (tests[cnt].start == -2)
+ {
+ puts ("compiling suceeds, FAIL");
+ errors++;
+ continue;
+ }
+
+ /* Run the actual test. */
+ err = regexec (&re, tests[cnt].str, 20, match, 0);
+
+ if (err != 0)
+ {
+ if (tests[cnt].start == -1)
+ puts ("no match, OK");
+ else
+ {
+ puts ("no match, FAIL");
+ ++errors;
+ }
+ }
+ else
+ {
+ if (match[0].rm_so == 0 && tests[cnt].start == 0
+ && match[0].rm_eo == 0 && tests[cnt].end == 0)
+ puts ("match, OK");
+ else if (match[0].rm_so + 1 == tests[cnt].start
+ && match[0].rm_eo == tests[cnt].end)
+ puts ("match, OK");
+ else
+ {
+ printf ("wrong match (%d to %d): FAIL\n",
+ match[0].rm_so, match[0].rm_eo);
+ ++errors;
+ }
+ }
+
+ /* Free all resources. */
+ regfree (&re);
+ }
+
+ printf ("\n%Zu tests, %d errors\n", cnt, errors);
+
+ return errors != 0;
+}
diff --git a/testsuite/runtest b/testsuite/runtest
new file mode 100755
index 0000000..0134a5d
--- /dev/null
+++ b/testsuite/runtest
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+: ${MAKE=make}
+: ${srcdir=.}
+: ${SED="../sed/sed"}
+
+makefile="$srcdir/Makefile.tests"
+test=`echo "$@"| sed 's,.*/,,'`
+
+# As a convenience, suppress the output of make if the test passes
+if $MAKE SED="$SED" srcdir="$srcdir" -f "$makefile" $test > tmp.test 2>&1; then
+ rm -f tmp.test
+else
+ exitcode=$?
+ cat tmp.test
+ rm -f tmp.test
+ exit $exitcode
+fi
diff --git a/testsuite/runtests.c b/testsuite/runtests.c
new file mode 100644
index 0000000..9d74475
--- /dev/null
+++ b/testsuite/runtests.c
@@ -0,0 +1,138 @@
+/***********************************************************
+
+Copyright 1995 by Tom Lord
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of the copyright holder not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+
+Tom Lord DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+EVENT SHALL TOM LORD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+******************************************************************/
+
+
+
+#include <sys/types.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+
+struct a_test
+{
+ int expected;
+ const char * pattern;
+ const unsigned char * data;
+};
+
+static const struct a_test the_tests[] =
+{
+#include "testcases.h"
+ {-1, 0, 0}
+};
+
+
+
+
+static int
+run_a_test (int id, const struct a_test * t)
+{
+ static const char * last_pattern = 0;
+ static regex_t r;
+ int err;
+ char errmsg[100];
+ int x;
+ regmatch_t regs[10];
+
+ if (!last_pattern || strcmp (last_pattern, t->pattern))
+ {
+ if (last_pattern)
+ regfree (&r);
+ last_pattern = t->pattern;
+ err = regcomp (&r, t->pattern, REG_EXTENDED);
+ if (err)
+ {
+ if (t->expected == 2)
+ {
+ puts (" OK.");
+ return 0;
+ }
+ if (last_pattern)
+ regfree (&r);
+ last_pattern = NULL;
+ regerror (err, &r, errmsg, 100);
+ printf (" FAIL: %s.\n", errmsg);
+ return 1;
+ }
+ else if (t->expected == 2)
+ {
+ printf ("test %d\n", id);
+ printf ("pattern \"%s\" successfull compilation not expected\n",
+ t->pattern);
+ return 1;
+ }
+ }
+
+ err = regexec (&r, t->data, 10, regs, 0);
+
+ if (err != t->expected)
+ {
+ printf ("test %d\n", id);
+ printf ("pattern \"%s\" data \"%s\" wanted %d got %d\n",
+ t->pattern, t->data, t->expected, err);
+ for (x = 0; x < 10; ++x)
+ printf ("reg %d == (%d, %d) %.*s\n",
+ x,
+ regs[x].rm_so,
+ regs[x].rm_eo,
+ regs[x].rm_eo - regs[x].rm_so,
+ t->data + regs[x].rm_so);
+ return 1;
+ }
+ puts (" OK.");
+ return 0;
+}
+
+
+
+int
+main (int argc, char * argv[])
+{
+ int x;
+ int lo;
+ int hi;
+ int res = 0;
+
+ lo = 0;
+ hi = (sizeof (the_tests) / sizeof (the_tests[0])) - 1;
+
+ if (argc > 1)
+ {
+ lo = atoi (argv[1]);
+ hi = lo + 1;
+
+ if (argc > 2)
+ hi = atoi (argv[2]);
+ }
+
+ for (x = lo; x < hi; ++x)
+ {
+ printf ("#%d:", x);
+ res |= run_a_test (x, &the_tests[x]);
+ }
+ return res != 0;
+}
diff --git a/testsuite/sep.good b/testsuite/sep.good
new file mode 100644
index 0000000..7db0e1e
--- /dev/null
+++ b/testsuite/sep.good
@@ -0,0 +1,3 @@
+
+///
+//
diff --git a/testsuite/sep.inp b/testsuite/sep.inp
new file mode 100644
index 0000000..5795f4b
--- /dev/null
+++ b/testsuite/sep.inp
@@ -0,0 +1,3 @@
+miss mary mack mack//mack/ran down/the track track track
+slashes\aren't%used enough/in/casual-conversation///
+possibly sentences would be more attractive if they ended in two slashes//
diff --git a/testsuite/sep.sed b/testsuite/sep.sed
new file mode 100644
index 0000000..4864b81
--- /dev/null
+++ b/testsuite/sep.sed
@@ -0,0 +1,4 @@
+# inspired by an autoconf generated configure script.
+s%/[^/][^/]*$%%
+s%[\/][^\/][^\/]*$%%
+s,.*[^\/],,
diff --git a/testsuite/space.good b/testsuite/space.good
new file mode 100644
index 0000000..9b267aa
--- /dev/null
+++ b/testsuite/space.good
@@ -0,0 +1,2 @@
+Hello_World_!
+SecondXXine__of_tests
diff --git a/testsuite/space.inp b/testsuite/space.inp
new file mode 100644
index 0000000..83b0adb
--- /dev/null
+++ b/testsuite/space.inp
@@ -0,0 +1,2 @@
+Hello World !
+Second_line_ of tests
diff --git a/testsuite/space.sed b/testsuite/space.sed
new file mode 100644
index 0000000..0bfa522
--- /dev/null
+++ b/testsuite/space.sed
@@ -0,0 +1 @@
+s/_\S/XX/g;s/\s/_/g
diff --git a/testsuite/subwrite.inp b/testsuite/subwrite.inp
new file mode 100644
index 0000000..3e910cc
--- /dev/null
+++ b/testsuite/subwrite.inp
@@ -0,0 +1,4 @@
+Not some church, and not the state,
+Not some dark capricious fate.
+Who you are, and when you lose,
+Comes only from the things you choose.
diff --git a/testsuite/subwrite.sed b/testsuite/subwrite.sed
new file mode 100644
index 0000000..1a4a01d
--- /dev/null
+++ b/testsuite/subwrite.sed
@@ -0,0 +1 @@
+s/you/YoU/w subwrite.wout
diff --git a/testsuite/subwrt1.good b/testsuite/subwrt1.good
new file mode 100644
index 0000000..560b698
--- /dev/null
+++ b/testsuite/subwrt1.good
@@ -0,0 +1,4 @@
+Not some church, and not the state,
+Not some dark capricious fate.
+Who YoU are, and when you lose,
+Comes only from the things YoU choose.
diff --git a/testsuite/subwrt2.good b/testsuite/subwrt2.good
new file mode 100644
index 0000000..c87bb68
--- /dev/null
+++ b/testsuite/subwrt2.good
@@ -0,0 +1,2 @@
+Who YoU are, and when you lose,
+Comes only from the things YoU choose.
diff --git a/testsuite/testcases.h b/testsuite/testcases.h
new file mode 100644
index 0000000..834f530
--- /dev/null
+++ b/testsuite/testcases.h
@@ -0,0 +1,167 @@
+ {0, "(.*)*\\1", "xx"},
+ {0, "^", ""},
+ {0, "$", ""},
+ {0, "^$", ""},
+ {0, "^a$", "a"},
+ {0, "abc", "abc"},
+ {1, "abc", "xbc"},
+ {1, "abc", "axc"},
+ {1, "abc", "abx"},
+ {0, "abc", "xabcy"},
+ {0, "abc", "ababc"},
+ {0, "ab*c", "abc"},
+ {0, "ab*bc", "abc"},
+ {0, "ab*bc", "abbc"},
+ {0, "ab*bc", "abbbbc"},
+ {0, "ab+bc", "abbc"},
+ {1, "ab+bc", "abc"},
+ {1, "ab+bc", "abq"},
+ {0, "ab+bc", "abbbbc"},
+ {0, "ab?bc", "abbc"},
+ {0, "ab?bc", "abc"},
+ {1, "ab?bc", "abbbbc"},
+ {0, "ab?c", "abc"},
+ {0, "^abc$", "abc"},
+ {1, "^abc$", "abcc"},
+ {0, "^abc", "abcc"},
+ {1, "^abc$", "aabc"},
+ {0, "abc$", "aabc"},
+ {0, "^", "abc"},
+ {0, "$", "abc"},
+ {0, "a.c", "abc"},
+ {0, "a.c", "axc"},
+ {0, "a.*c", "axyzc"},
+ {1, "a.*c", "axyzd"},
+ {1, "a[bc]d", "abc"},
+ {0, "a[bc]d", "abd"},
+ {1, "a[b-d]e", "abd"},
+ {0, "a[b-d]e", "ace"},
+ {0, "a[b-d]", "aac"},
+ {0, "a[-b]", "a-"},
+ {0, "a[b-]", "a-"},
+ {2, "a[b-a]", "-"},
+ {2, "a[]b", "-"},
+ {2, "a[", "-"},
+ {0, "a]", "a]"},
+ {0, "a[]]b", "a]b"},
+ {0, "a[^bc]d", "aed"},
+ {1, "a[^bc]d", "abd"},
+ {0, "a[^-b]c", "adc"},
+ {1, "a[^-b]c", "a-c"},
+ {1, "a[^]b]c", "a]c"},
+ {0, "a[^]b]c", "adc"},
+ {0, "ab|cd", "abc"},
+ {0, "ab|cd", "abcd"},
+ {0, "()ef", "def"},
+ {0, "()*", "-"},
+ {2, "*a", "-"},
+ {2, "^*", "-"},
+ {2, "$*", "-"},
+ {2, "(*)b", "-"},
+ {1, "$b", "b"},
+ {2, "a\\", "-"},
+ {0, "a\\(b", "a(b"},
+ {0, "a\\(*b", "ab"},
+ {0, "a\\(*b", "a((b"},
+ {1, "a\\x", "a\\x"},
+ {1, "abc)", "-"},
+ {2, "(abc", "-"},
+ {0, "((a))", "abc"},
+ {0, "(a)b(c)", "abc"},
+ {0, "a+b+c", "aabbabc"},
+ {0, "a**", "-"},
+ {0, "a*?", "-"},
+ {0, "(a*)*", "-"},
+ {0, "(a*)+", "-"},
+ {0, "(a|)*", "-"},
+ {0, "(a*|b)*", "-"},
+ {0, "(a+|b)*", "ab"},
+ {0, "(a+|b)+", "ab"},
+ {0, "(a+|b)?", "ab"},
+ {0, "[^ab]*", "cde"},
+ {0, "(^)*", "-"},
+ {0, "(ab|)*", "-"},
+ {2, ")(", "-"},
+ {1, "abc", ""},
+ {1, "abc", ""},
+ {0, "a*", ""},
+ {0, "([abc])*d", "abbbcd"},
+ {0, "([abc])*bcd", "abcd"},
+ {0, "a|b|c|d|e", "e"},
+ {0, "(a|b|c|d|e)f", "ef"},
+ {0, "((a*|b))*", "-"},
+ {0, "abcd*efg", "abcdefg"},
+ {0, "ab*", "xabyabbbz"},
+ {0, "ab*", "xayabbbz"},
+ {0, "(ab|cd)e", "abcde"},
+ {0, "[abhgefdc]ij", "hij"},
+ {1, "^(ab|cd)e", "abcde"},
+ {0, "(abc|)ef", "abcdef"},
+ {0, "(a|b)c*d", "abcd"},
+ {0, "(ab|ab*)bc", "abc"},
+ {0, "a([bc]*)c*", "abc"},
+ {0, "a([bc]*)(c*d)", "abcd"},
+ {0, "a([bc]+)(c*d)", "abcd"},
+ {0, "a([bc]*)(c+d)", "abcd"},
+ {0, "a[bcd]*dcdcde", "adcdcde"},
+ {1, "a[bcd]+dcdcde", "adcdcde"},
+ {0, "(ab|a)b*c", "abc"},
+ {0, "((a)(b)c)(d)", "abcd"},
+ {0, "[A-Za-z_][A-Za-z0-9_]*", "alpha"},
+ {0, "^a(bc+|b[eh])g|.h$", "abh"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "effgz"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "ij"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "effg"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "bcdd"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "reffgz"},
+ {1, "((((((((((a))))))))))", "-"},
+ {0, "(((((((((a)))))))))", "a"},
+ {1, "multiple words of text", "uh-uh"},
+ {0, "multiple words", "multiple words, yeah"},
+ {0, "(.*)c(.*)", "abcde"},
+ {1, "\\((.*),", "(.*)\\)"},
+ {1, "[k]", "ab"},
+ {0, "abcd", "abcd"},
+ {0, "a(bc)d", "abcd"},
+ {0, "a[-]?c", "ac"},
+ {0, "(....).*\\1", "beriberi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamer El Kazzafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar al-Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar el-Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'amar al-Kadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafy"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Quathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gheddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Al-Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Khadafy "},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Qudhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi"},
+ {0, "[[:digit:]]+", "01234"},
+ {1, "[[:alpha:]]+", "01234"},
+ {0, "^[[:digit:]]*$", "01234"},
+ {1, "^[[:digit:]]*$", "01234a"},
+ {0, "^[[:alnum:]]*$", "01234a"},
+ {0, "^[[:xdigit:]]*$", "01234a"},
+ {1, "^[[:xdigit:]]*$", "01234g"},
+ {0, "^[[:alnum:][:space:]]*$", "Hello world"},
diff --git a/testsuite/tst-boost.c b/testsuite/tst-boost.c
new file mode 100644
index 0000000..8446a2e
--- /dev/null
+++ b/testsuite/tst-boost.c
@@ -0,0 +1,227 @@
+/* Regular expression tests.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void
+frob_escapes (char *src, int pattern)
+{
+ char *dst;
+
+ for (dst = src; *src != '\0'; dst++, src++)
+ {
+ if (*src == '\\')
+ {
+ switch (src[1])
+ {
+ case 't':
+ src++;
+ *dst = '\t';
+ continue;
+ case 'n':
+ src++;
+ *dst = '\n';
+ continue;
+ case 'r':
+ src++;
+ *dst = '\r';
+ continue;
+ case '\\':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ if (!pattern)
+ {
+ src++;
+ *dst = *src;
+ continue;
+ }
+ break;
+ }
+ }
+ if (src != dst)
+ *dst = *src;
+ }
+ *dst = '\0';
+}
+
+int
+main (int argc, char **argv)
+{
+ int ret = 0, n;
+ char *line = NULL;
+ size_t line_len = 0;
+ ssize_t len;
+ FILE *f;
+ char *pattern, *string;
+ int flags = REG_EXTENDED;
+ int eflags = 0;
+ regex_t re;
+ regmatch_t rm[20];
+
+ mtrace ();
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "Missing test filename\n");
+ return 1;
+ }
+
+ f = fopen (argv[1], "r");
+ if (f == NULL)
+ {
+ fprintf (stderr, "Couldn't open %s\n", argv[1]);
+ return 1;
+ }
+
+ while ((len = getline (&line, &line_len, f)) > 0)
+ {
+ char *p, *q;
+ int i;
+
+ if (line[len - 1] == '\n')
+ line[--len] = '\0';
+
+ puts (line);
+
+ if (line[0] == ';')
+ continue;
+
+ if (line[0] == '\0')
+ continue;
+
+ if (line[0] == '-')
+ {
+ if (strstr (line, "REG_BASIC"))
+ flags = 0;
+ else
+ flags = REG_EXTENDED;
+ if (strstr (line, "REG_ICASE"))
+ flags |= REG_ICASE;
+ if (strstr (line, "REG_NEWLINE"))
+ flags |= REG_NEWLINE;
+ eflags = 0;
+ if (strstr (line, "REG_NOTBOL"))
+ eflags |= REG_NOTBOL;
+ if (strstr (line, "REG_NOTEOL"))
+ eflags |= REG_NOTEOL;
+ continue;
+ }
+
+ pattern = line + strspn (line, " \t");
+ if (*pattern == '\0')
+ continue;
+ p = pattern + strcspn (pattern, " \t");
+ if (*p == '\0')
+ continue;
+ *p++ = '\0';
+
+ string = p + strspn (p, " \t");
+ if (*string == '\0')
+ continue;
+ if (*string == '"')
+ {
+ string++;
+ p = strchr (string, '"');
+ if (p == NULL)
+ continue;
+ *p++ = '\0';
+ }
+ else
+ {
+ p = string + strcspn (string, " \t");
+ if (*string == '!')
+ string = NULL;
+ else if (*p == '\0')
+ continue;
+ else
+ *p++ = '\0';
+ }
+
+ frob_escapes (pattern, 1);
+ if (string != NULL)
+ frob_escapes (string, 0);
+
+ n = regcomp (&re, pattern, flags);
+ if (n != 0)
+ {
+ if (string != NULL)
+ {
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("FAIL regcomp unexpectedly failed: %s\n",
+ buf);
+ ret = 1;
+ }
+ continue;
+ }
+ else if (string == NULL)
+ {
+ regfree (&re);
+ puts ("FAIL regcomp unpexpectedly succeeded");
+ ret = 1;
+ continue;
+ }
+
+ if (regexec (&re, string, 20, rm, eflags))
+ {
+ for (i = 0; i < 20; ++i)
+ {
+ rm[i].rm_so = -1;
+ rm[i].rm_eo = -1;
+ }
+ }
+
+ regfree (&re);
+
+ for (i = 0; i < 20 && *p != '\0'; ++i)
+ {
+ int rm_so, rm_eo;
+
+ rm_so = strtol (p, &q, 10);
+ if (p == q)
+ break;
+ p = q;
+
+ rm_eo = strtol (p, &q, 10);
+ if (p == q)
+ break;
+ p = q;
+
+ if (rm[i].rm_so != rm_so || rm[i].rm_eo != rm_eo)
+ {
+ printf ("FAIL rm[%d] %d..%d != expected %d..%d\n",
+ i, rm[i].rm_so, rm[i].rm_eo, rm_so, rm_eo);
+ ret = 1;
+ break;
+ }
+ }
+ }
+
+ free (line);
+ fclose (f);
+ return ret;
+}
diff --git a/testsuite/tst-pcre.c b/testsuite/tst-pcre.c
new file mode 100644
index 0000000..3780a09
--- /dev/null
+++ b/testsuite/tst-pcre.c
@@ -0,0 +1,241 @@
+/* Regular expression tests.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main (int argc, char **argv)
+{
+ int ret = 0;
+ char *line = NULL;
+ size_t line_len = 0;
+ ssize_t len;
+ FILE *f;
+ char *pattern = NULL, *string = NULL;
+ regmatch_t rm[20];
+ size_t pattern_alloced = 0, string_alloced = 0;
+ int ignorecase = 0;
+ int pattern_valid = 0, rm_valid = 0;
+ size_t linenum;
+
+ mtrace ();
+
+ if (argc < 2)
+ {
+ fprintf (stderr, "Missing test filename\n");
+ return 1;
+ }
+
+ f = fopen (argv[1], "r");
+ if (f == NULL)
+ {
+ fprintf (stderr, "Couldn't open %s\n", argv[1]);
+ return 1;
+ }
+
+ if ((len = getline (&line, &line_len, f)) <= 0
+ || strncmp (line, "# PCRE", 6) != 0)
+ {
+ fprintf (stderr, "Not a PCRE test file\n");
+ fclose (f);
+ free (line);
+ return 1;
+ }
+
+ linenum = 1;
+
+ while ((len = getline (&line, &line_len, f)) > 0)
+ {
+ char *p;
+ unsigned long num;
+
+ ++linenum;
+
+ if (line[len - 1] == '\n')
+ line[--len] = '\0';
+
+ if (line[0] == '#')
+ continue;
+
+ if (line[0] == '\0')
+ {
+ /* End of test. */
+ ignorecase = 0;
+ pattern_valid = 0;
+ rm_valid = 0;
+ continue;
+ }
+
+ if (line[0] == '/')
+ {
+ /* Pattern. */
+ p = strrchr (line + 1, '/');
+
+ pattern_valid = 0;
+ rm_valid = 0;
+ if (p == NULL)
+ {
+ printf ("%zd: Invalid pattern line: %s\n", linenum, line);
+ ret = 1;
+ continue;
+ }
+
+ if (p[1] == 'i' && p[2] == '\0')
+ ignorecase = 1;
+ else if (p[1] != '\0')
+ {
+ printf ("%zd: Invalid pattern line: %s\n", linenum, line);
+ ret = 1;
+ continue;
+ }
+
+ if (pattern_alloced < (size_t) (p - line))
+ {
+ pattern = realloc (pattern, p - line);
+ if (pattern == NULL)
+ {
+ printf ("%zd: Cannot record pattern: %m\n", linenum);
+ ret = 1;
+ break;
+ }
+ pattern_alloced = p - line;
+ }
+
+ memcpy (pattern, line + 1, p - line - 1);
+ pattern[p - line - 1] = '\0';
+ pattern_valid = 1;
+ continue;
+ }
+
+ if (strncmp (line, " ", 4) == 0)
+ {
+ regex_t re;
+ int n;
+
+ if (!pattern_valid)
+ {
+ printf ("%zd: No previous valid pattern %s\n", linenum, line);
+ continue;
+ }
+
+ if (string_alloced < (size_t) (len - 3))
+ {
+ string = realloc (string, len - 3);
+ if (string == NULL)
+ {
+ printf ("%zd: Cannot record search string: %m\n", linenum);
+ ret = 1;
+ break;
+ }
+ string_alloced = len - 3;
+ }
+
+ memcpy (string, line + 4, len - 3);
+
+ n = regcomp (&re, pattern,
+ REG_EXTENDED | (ignorecase ? REG_ICASE : 0));
+ if (n != 0)
+ {
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("%zd: regcomp failed for %s: %s\n",
+ linenum, pattern, buf);
+ ret = 1;
+ continue;
+ }
+
+ if (regexec (&re, string, 20, rm, 0))
+ {
+ rm[0].rm_so = -1;
+ rm[0].rm_eo = -1;
+ }
+
+ regfree (&re);
+ rm_valid = 1;
+ continue;
+ }
+
+ if (!rm_valid)
+ {
+ printf ("%zd: No preceeding pattern or search string\n", linenum);
+ ret = 1;
+ continue;
+ }
+
+ if (strcmp (line, "No match") == 0)
+ {
+ if (rm[0].rm_so != -1 || rm[0].rm_eo != -1)
+ {
+ printf ("%zd: /%s/ on %s unexpectedly matched %d..%d\n",
+ linenum, pattern, string, rm[0].rm_so, rm[0].rm_eo);
+ ret = 1;
+ }
+
+ continue;
+ }
+
+ p = line;
+ if (*p == ' ')
+ ++p;
+
+ num = strtoul (p, &p, 10);
+ if (num >= 20 || *p != ':' || p[1] != ' ')
+ {
+ printf ("%zd: Invalid line %s\n", linenum, line);
+ ret = 1;
+ continue;
+ }
+
+ if (rm[num].rm_so == -1 || rm[num].rm_eo == -1)
+ {
+ if (strcmp (p + 2, "<unset>") != 0)
+ {
+ printf ("%zd: /%s/ on %s unexpectedly failed to match register %ld %d..%d\n",
+ linenum, pattern, string, num,
+ rm[num].rm_so, rm[num].rm_eo);
+ ret = 1;
+ }
+ continue;
+ }
+
+ if (rm[num].rm_eo < rm[num].rm_so
+ || rm[num].rm_eo - rm[num].rm_so != len - (p + 2 - line)
+ || strncmp (p + 2, string + rm[num].rm_so,
+ rm[num].rm_eo - rm[num].rm_so) != 0)
+ {
+ printf ("%zd: /%s/ on %s unexpectedly failed to match %s for register %ld %d..%d\n",
+ linenum, pattern, string, p + 2, num,
+ rm[num].rm_so, rm[num].rm_eo);
+ ret = 1;
+ continue;
+ }
+ }
+
+ free (pattern);
+ free (string);
+ free (line);
+ fclose (f);
+ return ret;
+}
diff --git a/testsuite/tst-regex b/testsuite/tst-regex
new file mode 100755
index 0000000..5d596c5
--- /dev/null
+++ b/testsuite/tst-regex
Binary files differ
diff --git a/testsuite/tst-regex.c b/testsuite/tst-regex.c
new file mode 100644
index 0000000..3c62b66
--- /dev/null
+++ b/testsuite/tst-regex.c
@@ -0,0 +1,265 @@
+/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define _GNU_SOURCE 1
+#include <alloca.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <iconv.h>
+#include <locale.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <regex.h>
+
+
+static iconv_t cd;
+static char *mem;
+static char *umem;
+static size_t memlen;
+static size_t umemlen;
+static int test_expr (const char *expr, int expected, int expectedicase);
+static int run_test (const char *expr, const char *mem, size_t memlen,
+ int icase, int expected);
+static int run_test_backwards (const char *expr, const char *mem,
+ size_t memlen, int icase, int expected);
+
+
+int
+main (int argc, char *argv[])
+{
+ int fd;
+ struct stat st;
+ int result;
+ char *inmem;
+ char *outmem;
+ size_t inlen;
+ size_t outlen;
+
+ mtrace ();
+
+ if (!argv[1])
+ exit (1);
+
+ /* Make the content of the file available in memory. */
+ fd = open (argv[1], O_RDONLY);
+ if (fd == -1)
+ error (EXIT_FAILURE, errno, "cannot open %s", basename (argv[1]));
+
+ if (fstat (fd, &st) != 0)
+ error (EXIT_FAILURE, errno, "cannot stat %s", basename (argv[1]));
+ memlen = st.st_size;
+
+ mem = (char *) malloc (memlen + 1);
+ if (mem == NULL)
+ error (EXIT_FAILURE, errno, "while allocating buffer");
+
+ if ((size_t) read (fd, mem, memlen) != memlen)
+ error (EXIT_FAILURE, 0, "cannot read entire file");
+ mem[memlen] = '\0';
+
+ close (fd);
+
+ /* We have to convert a few things from Latin-1 to UTF-8. */
+ cd = iconv_open ("UTF-8", "ISO-8859-1");
+ if (cd == (iconv_t) -1)
+ error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
+
+ /* For the second test we have to convert the file content to UTF-8.
+ Since the text is mostly ASCII it should be enough to allocate
+ twice as much memory for the UTF-8 text than for the Latin-1
+ text. */
+ umem = (char *) calloc (2, memlen);
+ if (umem == NULL)
+ error (EXIT_FAILURE, errno, "while allocating buffer");
+
+ inmem = mem;
+ inlen = memlen;
+ outmem = umem;
+ outlen = 2 * memlen - 1;
+ iconv (cd, &inmem, &inlen, &outmem, &outlen);
+ umemlen = outmem - umem;
+ if (inlen != 0)
+ error (EXIT_FAILURE, errno, "cannot convert buffer");
+
+#ifdef DEBUG
+ re_set_syntax (RE_DEBUG);
+#endif
+
+ /* Run the actual tests. All tests are run in a single-byte and a
+ multi-byte locale. */
+ result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 2, 2);
+ result |= test_expr ("G.ran", 2, 3);
+ result |= test_expr ("G.\\{1\\}ran", 2, 3);
+ result |= test_expr ("G.*ran", 3, 44);
+ result |= test_expr ("[äáàâ]", 0, 0);
+ result |= test_expr ("Uddeborg", 2, 2);
+ result |= test_expr (".Uddeborg", 2, 2);
+
+ /* Free the resources. */
+ free (umem);
+ iconv_close (cd);
+ free (mem);
+
+ return result;
+}
+
+
+static int
+test_expr (const char *expr, int expected, int expectedicase)
+{
+ int result;
+ printf ("\nTest \"%s\" with 8-bit locale\n", expr);
+ result = run_test (expr, mem, memlen, 0, expected);
+ printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
+ result |= run_test (expr, mem, memlen, 1, expectedicase);
+ printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
+ result |= run_test_backwards (expr, mem, memlen, 0, expected);
+ printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
+ expr);
+ result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
+ return result;
+}
+
+
+static int
+run_test (const char *expr, const char *mem, size_t memlen, int icase,
+ int expected)
+{
+ regex_t re;
+ int err;
+ size_t offset;
+ int cnt;
+
+ err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
+ if (err != REG_NOERROR)
+ {
+ char buf[200];
+ regerror (err, &re, buf, sizeof buf);
+ error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
+ }
+
+ cnt = 0;
+ offset = 0;
+ assert (mem[memlen] == '\0');
+ while (offset < memlen)
+ {
+ regmatch_t ma[1];
+ const char *sp;
+ const char *ep;
+
+ err = regexec (&re, mem + offset, 1, ma, 0);
+ if (err == REG_NOMATCH)
+ break;
+
+ if (err != REG_NOERROR)
+ {
+ char buf[200];
+ regerror (err, &re, buf, sizeof buf);
+ error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
+ }
+
+ assert (ma[0].rm_so >= 0);
+ sp = mem + offset + ma[0].rm_so;
+ while (sp > mem && sp[-1] != '\n')
+ --sp;
+
+ ep = mem + offset + ma[0].rm_so;
+ while (*ep != '\0' && *ep != '\n')
+ ++ep;
+
+ printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
+
+ offset = ep + 1 - mem;
+ }
+
+ regfree (&re);
+
+ /* Return an error if the number of matches found is not match we
+ expect. */
+ return cnt != expected;
+}
+
+
+static int
+run_test_backwards (const char *expr, const char *mem, size_t memlen,
+ int icase, int expected)
+{
+ regex_t re;
+ const char *err;
+ size_t offset;
+ int cnt;
+
+ re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
+ | RE_HAT_LISTS_NOT_NEWLINE
+ | (icase ? RE_ICASE : 0));
+
+ memset (&re, 0, sizeof (re));
+ re.fastmap = malloc (256);
+ if (re.fastmap == NULL)
+ error (EXIT_FAILURE, errno, "cannot allocate fastmap");
+
+ err = re_compile_pattern (expr, strlen (expr), &re);
+ if (err != NULL)
+ error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
+
+ if (re_compile_fastmap (&re))
+ error (EXIT_FAILURE, 0, "couldn't compile fastmap");
+
+ cnt = 0;
+ offset = memlen;
+ assert (mem[memlen] == '\0');
+ while (offset <= memlen)
+ {
+ int start;
+ const char *sp;
+ const char *ep;
+
+ start = re_search (&re, mem, memlen, offset, -offset, NULL);
+ if (start == -1)
+ break;
+
+ if (start == -2)
+ error (EXIT_FAILURE, 0, "internal error in re_search");
+
+ sp = mem + start;
+ while (sp > mem && sp[-1] != '\n')
+ --sp;
+
+ ep = mem + start;
+ while (*ep != '\0' && *ep != '\n')
+ ++ep;
+
+ printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
+
+ offset = sp - 1 - mem;
+ }
+
+ regfree (&re);
+
+ /* Return an error if the number of matches found is not match we
+ expect. */
+ return cnt != expected;
+}
diff --git a/testsuite/tst-rxspencer.c b/testsuite/tst-rxspencer.c
new file mode 100644
index 0000000..1d3e90e
--- /dev/null
+++ b/testsuite/tst-rxspencer.c
@@ -0,0 +1,551 @@
+/* Regular expression tests.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <locale.h>
+#include <getopt.h>
+
+static void
+replace_special_chars (char *str)
+{
+ for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str)
+ switch (*str)
+ {
+ case 'N': *str = '\n'; break;
+ case 'T': *str = '\t'; break;
+ case 'S': *str = ' '; break;
+ case 'Z': *str = '\0'; break;
+ }
+}
+
+static void
+glibc_re_syntax (char *str)
+{
+ char *p, *end = strchr (str, '\0') + 1;
+
+ /* Replace [[:<:]] with \< and [[:>:]] with \>. */
+ for (p = str; (p = strstr (p, "[[:")) != NULL; )
+ if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0)
+ {
+ p[0] = '\\';
+ p[1] = p[3];
+ memmove (p + 2, p + 7, end - p - 7);
+ end -= 5;
+ p += 2;
+ }
+ else
+ p += 3;
+}
+
+static char *
+mb_replace (char *dst, const char c)
+{
+ switch (c)
+ {
+ /* Replace a with \'a and A with \'A. */
+ case 'a':
+ *dst++ = '\xc3';
+ *dst++ = '\xa1';
+ break;
+ case 'A':
+ *dst++ = '\xc3';
+ *dst++ = '\x81';
+ break;
+ /* Replace b with \v{c} and B with \v{C}. */
+ case 'b':
+ *dst++ = '\xc4';
+ *dst++ = '\x8d';
+ break;
+ case 'B':
+ *dst++ = '\xc4';
+ *dst++ = '\x8c';
+ break;
+ /* Replace c with \v{d} and C with \v{D}. */
+ case 'c':
+ *dst++ = '\xc4';
+ *dst++ = '\x8f';
+ break;
+ case 'C':
+ *dst++ = '\xc4';
+ *dst++ = '\x8e';
+ break;
+ /* Replace d with \'e and D with \'E. */
+ case 'd':
+ *dst++ = '\xc3';
+ *dst++ = '\xa9';
+ break;
+ case 'D':
+ *dst++ = '\xc3';
+ *dst++ = '\x89';
+ break;
+ }
+ return dst;
+}
+
+static char *
+mb_frob_string (const char *str, const char *letters)
+{
+ char *ret, *dst;
+ const char *src;
+
+ if (str == NULL)
+ return NULL;
+
+ ret = malloc (2 * strlen (str) + 1);
+ if (ret == NULL)
+ return NULL;
+
+ for (src = str, dst = ret; *src; ++src)
+ if (strchr (letters, *src))
+ dst = mb_replace (dst, *src);
+ else
+ *dst++ = *src;
+ *dst = '\0';
+ return ret;
+}
+
+/* Like mb_frob_string, but don't replace anything between
+ [: and :], [. and .] or [= and =]. */
+
+static char *
+mb_frob_pattern (const char *str, const char *letters)
+{
+ char *ret, *dst;
+ const char *src;
+ int in_class = 0;
+
+ if (str == NULL)
+ return NULL;
+
+ ret = malloc (2 * strlen (str) + 1);
+ if (ret == NULL)
+ return NULL;
+
+ for (src = str, dst = ret; *src; ++src)
+ if (!in_class && strchr (letters, *src))
+ dst = mb_replace (dst, *src);
+ else
+ {
+ if (!in_class && *src == '[' && strchr (":.=", src[1]))
+ in_class = 1;
+ else if (in_class && *src == ']' && strchr (":.=", src[-1]))
+ in_class = 0;
+ *dst++ = *src;
+ }
+ *dst = '\0';
+ return ret;
+}
+
+static int
+check_match (regmatch_t *rm, int idx, const char *string,
+ const char *match, const char *fail)
+{
+ if (match[0] == '-' && match[1] == '\0')
+ {
+ if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1)
+ return 0;
+ printf ("%s rm[%d] unexpectedly matched\n", fail, idx);
+ return 1;
+ }
+
+ if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1)
+ {
+ printf ("%s rm[%d] unexpectedly did not match\n", fail, idx);
+ return 1;
+ }
+
+ if (match[0] == '@')
+ {
+ if (rm[idx].rm_so != rm[idx].rm_eo)
+ {
+ printf ("%s rm[%d] not empty\n", fail, idx);
+ return 1;
+ }
+
+ if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) ?: 1))
+ {
+ printf ("%s rm[%d] not matching %s\n", fail, idx, match);
+ return 1;
+ }
+ return 0;
+ }
+
+ if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match)
+ || strncmp (string + rm[idx].rm_so, match,
+ rm[idx].rm_eo - rm[idx].rm_so))
+ {
+ printf ("%s rm[%d] not matching %s\n", fail, idx, match);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+test (const char *pattern, int cflags, const char *string, int eflags,
+ char *expect, char *matches, const char *fail)
+{
+ regex_t re;
+ regmatch_t rm[10];
+ int n, ret = 0;
+
+ n = regcomp (&re, pattern, cflags);
+ if (n != 0)
+ {
+ if (eflags == -1)
+ {
+ static struct { reg_errcode_t code; const char *name; } codes []
+#define C(x) { REG_##x, #x }
+ = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE),
+ C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK),
+ C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE),
+ C(ESPACE), C(BADRPT) };
+
+ int i;
+ for (i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i)
+ if (n == codes[i].code)
+ {
+ if (strcmp (string, codes[i].name))
+ {
+ printf ("%s regcomp returned REG_%s (expected REG_%s)\n",
+ fail, codes[i].name, string);
+ return 1;
+ }
+ return 0;
+ }
+
+ printf ("%s regcomp return value REG_%d\n", fail, n);
+ return 1;
+ }
+
+ char buf[500];
+ regerror (n, &re, buf, sizeof (buf));
+ printf ("%s regcomp failed: %s\n", fail, buf);
+ return 1;
+ }
+
+ if (eflags == -1)
+ {
+ regfree (&re);
+
+ /* The test case file assumes something only guaranteed by the
+ rxspencer regex implementation. Namely that for empty
+ expressions regcomp() return REG_EMPTY. This is not the case
+ for us and so we ignore this error. */
+ if (strcmp (string, "EMPTY") == 0)
+ return 0;
+
+ printf ("%s regcomp unexpectedly succeeded\n", fail);
+ return 1;
+ }
+
+ if (regexec (&re, string, 10, rm, eflags))
+ {
+ regfree (&re);
+ if (expect == NULL)
+ return 0;
+ printf ("%s regexec failed\n", fail);
+ return 1;
+ }
+
+ regfree (&re);
+
+ if (expect == NULL)
+ {
+ printf ("%s regexec unexpectedly succeeded\n", fail);
+ return 1;
+ }
+
+ if (cflags & REG_NOSUB)
+ return 0;
+
+ ret = check_match (rm, 0, string, expect, fail);
+ if (matches == NULL)
+ return ret;
+
+ for (n = 1; ret == 0 && n < 10; ++n)
+ {
+ char *p = NULL;
+
+ if (matches)
+ {
+ p = strchr (matches, ',');
+ if (p != NULL)
+ *p = '\0';
+ }
+ ret = check_match (rm, n, string, matches ?: "-", fail);
+ if (p)
+ {
+ *p = ',';
+ matches = p + 1;
+ }
+ else
+ matches = NULL;
+ }
+
+ return ret;
+}
+
+static int
+mb_test (const char *pattern, int cflags, const char *string, int eflags,
+ char *expect, const char *matches, const char *letters,
+ const char *fail)
+{
+ char *pattern_mb = mb_frob_pattern (pattern, letters);
+ const char *string_mb
+ = eflags == -1 ? string : mb_frob_string (string, letters);
+ char *expect_mb = mb_frob_string (expect, letters);
+ char *matches_mb = mb_frob_string (matches, letters);
+ int ret = 0;
+
+ if (!pattern_mb || !string_mb
+ || (expect && !expect_mb) || (matches && !matches_mb))
+ {
+ printf ("%s %m", fail);
+ ret = 1;
+ }
+ else
+ ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb,
+ matches_mb, fail);
+
+ free (matches_mb);
+ free (expect_mb);
+ if (string_mb != string)
+ free ((char *) string_mb);
+ free (pattern_mb);
+ return ret;
+}
+
+static int
+mb_tests (const char *pattern, int cflags, const char *string, int eflags,
+ char *expect, const char *matches)
+{
+ int ret = 0;
+ int i;
+ char letters[9], fail[20];
+
+ /* The tests aren't supposed to work with xdigit, since a-dA-D are
+ hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */
+ if (strstr (pattern, "[:xdigit:]"))
+ return 0;
+
+ /* XXX: regex ATM handles only single byte equivalence classes. */
+ if (strstr (pattern, "[[=b=]]"))
+ return 0;
+
+ for (i = 1; i < 16; ++i)
+ {
+ char *p = letters;
+ if (i & 1)
+ {
+ if (!strchr (pattern, 'a') && !strchr (string, 'a')
+ && !strchr (pattern, 'A') && !strchr (string, 'A'))
+ continue;
+ *p++ = 'a', *p++ = 'A';
+ }
+ if (i & 2)
+ {
+ if (!strchr (pattern, 'b') && !strchr (string, 'b')
+ && !strchr (pattern, 'B') && !strchr (string, 'B'))
+ continue;
+ *p++ = 'b', *p++ = 'B';
+ }
+ if (i & 4)
+ {
+ if (!strchr (pattern, 'c') && !strchr (string, 'c')
+ && !strchr (pattern, 'C') && !strchr (string, 'C'))
+ continue;
+ *p++ = 'c', *p++ = 'C';
+ }
+ if (i & 8)
+ {
+ if (!strchr (pattern, 'd') && !strchr (string, 'd')
+ && !strchr (pattern, 'D') && !strchr (string, 'D'))
+ continue;
+ *p++ = 'd', *p++ = 'D';
+ }
+ *p++ = '\0';
+ sprintf (fail, "UTF-8 %s FAIL", letters);
+ ret |= mb_test (pattern, cflags, string, eflags, expect, matches,
+ letters, fail);
+ }
+ return ret;
+}
+
+int
+main (int argc, char **argv)
+{
+ int ret = 0;
+ char *line = NULL;
+ size_t line_len = 0;
+ ssize_t len;
+ FILE *f;
+ static int test_utf8 = 0;
+ static const struct option options[] =
+ {
+ {"utf8", no_argument, &test_utf8, 1},
+ {NULL, 0, NULL, 0 }
+ };
+
+ mtrace ();
+
+ while (getopt_long (argc, argv, "", options, NULL) >= 0);
+
+ if (optind + 1 != argc)
+ {
+ fprintf (stderr, "Missing test filename\n");
+ return 1;
+ }
+
+ f = fopen (argv[optind], "r");
+ if (f == NULL)
+ {
+ fprintf (stderr, "Couldn't open %s\n", argv[optind]);
+ return 1;
+ }
+
+ while ((len = getline (&line, &line_len, f)) > 0)
+ {
+ char *pattern, *flagstr, *string, *expect, *matches, *p;
+ int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0;
+
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+
+ /* Skip comments and empty lines. */
+ if (*line == '#' || *line == '\0')
+ continue;
+
+ puts (line);
+ fflush (stdout);
+
+ pattern = strtok (line, "\t");
+ if (pattern == NULL)
+ continue;
+
+ if (strcmp (pattern, "\"\"") == 0)
+ pattern += 2;
+
+ flagstr = strtok (NULL, "\t");
+ if (flagstr == NULL)
+ continue;
+
+ string = strtok (NULL, "\t");
+ if (string == NULL)
+ continue;
+
+ if (strcmp (string, "\"\"") == 0)
+ string += 2;
+
+ for (p = flagstr; *p; ++p)
+ switch (*p)
+ {
+ case '-':
+ break;
+ case 'b':
+ cflags &= ~REG_EXTENDED;
+ break;
+ case '&':
+ try_bre_ere = 1;
+ break;
+ case 'C':
+ eflags = -1;
+ break;
+ case 'i':
+ cflags |= REG_ICASE;
+ break;
+ case 's':
+ cflags |= REG_NOSUB;
+ break;
+ case 'n':
+ cflags |= REG_NEWLINE;
+ break;
+ case '^':
+ eflags |= REG_NOTBOL;
+ break;
+ case '$':
+ eflags |= REG_NOTEOL;
+ break;
+ case 'm':
+ case 'p':
+ case '#':
+ /* Not supported. */
+ flagstr = NULL;
+ break;
+ }
+
+ if (flagstr == NULL)
+ continue;
+
+ replace_special_chars (pattern);
+ glibc_re_syntax (pattern);
+ if (eflags != -1)
+ replace_special_chars (string);
+
+ expect = strtok (NULL, "\t");
+ matches = NULL;
+ if (expect != NULL)
+ {
+ replace_special_chars (expect);
+ matches = strtok (NULL, "\t");
+ if (matches != NULL)
+ replace_special_chars (matches);
+ }
+
+ if (setlocale (LC_ALL, "C") == NULL)
+ {
+ puts ("setlocale C failed");
+ ret = 1;
+ }
+ if (test (pattern, cflags, string, eflags, expect, matches, "FAIL")
+ || (try_bre_ere
+ && test (pattern, cflags & ~REG_EXTENDED, string, eflags,
+ expect, matches, "FAIL")))
+ ret = 1;
+ else if (test_utf8)
+ {
+ if (setlocale (LC_ALL, "cs_CZ.UTF-8") == NULL)
+ {
+ puts ("setlocale cs_CZ.UTF-8 failed");
+ ret = 1;
+ }
+ else if (test (pattern, cflags, string, eflags, expect, matches,
+ "UTF-8 FAIL")
+ || (try_bre_ere
+ && test (pattern, cflags & ~REG_EXTENDED, string,
+ eflags, expect, matches, "UTF-8 FAIL")))
+ ret = 1;
+ else if (mb_tests (pattern, cflags, string, eflags, expect, matches)
+ || (try_bre_ere
+ && mb_tests (pattern, cflags & ~REG_EXTENDED, string,
+ eflags, expect, matches)))
+ ret = 1;
+ }
+ }
+
+ free (line);
+ fclose (f);
+ return ret;
+}
diff --git a/testsuite/uniq.good b/testsuite/uniq.good
new file mode 100644
index 0000000..2941bec
--- /dev/null
+++ b/testsuite/uniq.good
@@ -0,0 +1,874 @@
+
+#define DPRINTF(p) /*nothing */
+#define DPRINTF(p) printf p
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define class pcre_class
+#define match_condassert 0x01 /* Called to check a condition assertion */
+#define match_isgroup 0x02 /* Set if start of bracketed group */
+#else
+#endif
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG
+#ifdef __cplusplus
+#include "internal.h"
+&& length - re->max_match_size > start_offset)
+((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(i.e. keep it out of the loop). Also we can test that there are at least
+(md->ctypes[*eptr++] & ctype_digit) != 0)
+(md->ctypes[*eptr++] & ctype_digit) == 0)
+(md->ctypes[*eptr++] & ctype_space) != 0)
+(md->ctypes[*eptr++] & ctype_space) == 0)
+(md->ctypes[*eptr++] & ctype_word) != 0)
+(md->ctypes[*eptr++] & ctype_word) == 0)
+(offsetcount - 2) * sizeof (int));
+(offsets == NULL && offsetcount > 0))
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (save);
+(re->tables + fcc_offset)[req_char] : req_char;
+* Match a back-reference *
+* Execute a Regular Expression *
+* Match from current position *
+* Debugging function to print chars *
+* Perl-Compatible Regular Expressions *
+* Macros and tables for character handling *
+*************************************************/
+*/
+*iptr = -1;
+*iptr++ = -1;
+*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
+*prev == OP_ONCE)
+-----------------------------------------------------------------------------
+-1 => failed to match
+/*
+/* "Once" brackets are like assertion brackets except that after a match,
+/* ... else fall through */
+/* Advance to a possible match for an initial string after study */
+/* Allow compilation as C++ source code, should anybody want to do that. */
+/* Always fail if not enough characters left */
+/* An alternation is the end of a branch; scan along to find the end of the
+/* Assert before internal newline if multiline, or before a terminating
+/* Assertion brackets. Check the alternative branches in turn - the
+/* At the start of a bracketed group, add the current subject pointer to the
+/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
+/* Caseful comparisons */
+/* Change option settings */
+/* Common code for all repeated single character type matches */
+/* Common code for all repeated single-character matches. We can give
+/* Compute the minimum number of offsets that we need to reset each time. Doing
+/* Conditional group: compilation checked that there are no more than
+/* Continue as from after the assertion, updating the offsets high water
+/* Continue from after the assertion, updating the offsets high water
+/* Control never gets here */
+/* Control never reaches here */
+/* Copy the offset information from temporary store if necessary */
+/* Do a single test if no case difference is set up */
+/* Do not stick any code in here without much thought; it is assumed
+/* End of a group, repeated or non-repeating. If we are at the end of
+/* End of subject assertion (\z) */
+/* End of subject or ending \n assertion (\Z) */
+/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
+/* First, ensure the minimum number of matches are present. */
+/* First, ensure the minimum number of matches are present. Use inline
+/* First, ensure the minimum number of matches are present. We get back
+/* Flag bits for the match() function */
+/* For a non-repeating ket, just continue at this level. This also
+/* For anchored or unanchored matches, there may be a "last known required
+/* For extended extraction brackets (large number), we have to fish out
+/* For extended extraction brackets (large number), we have to fish out the
+/* For matches anchored to the end of the pattern, we can often avoid
+/* If a back reference hasn't been set, the length that is passed is greater
+/* If checking an assertion for a condition, return TRUE. */
+/* If hit the end of the group (which could be repeated), fail */
+/* If max == min we can continue with the main loop without the
+/* If maximizing it is worth using inline code for speed, doing the type
+/* If maximizing, find the longest possible run, then work backwards. */
+/* If maximizing, find the longest string and work backwards */
+/* If min = max, continue at the same level without recursing */
+/* If min = max, continue at the same level without recursion.
+/* If minimizing, keep testing the rest of the expression and advancing
+/* If minimizing, keep trying and advancing the pointer */
+/* If minimizing, we have to test the rest of the pattern before each
+/* If req_char is set, we know that that character must appear in the subject
+/* If the expression has got more back references than the offsets supplied can
+/* If the length of the reference is zero, just continue with the
+/* If the reference is unset, set the length to be longer than the amount
+/* If we can't find the required character, break the matching loop */
+/* If we have found the required character, save the point where we
+/* In all other cases except a conditional group we have to check the
+/* In case the recursion has set more capturing values, save the final
+/* Include the internals header, which itself includes Standard C headers plus
+/* Insufficient room for saving captured contents */
+/* Loop for handling unanchored repeated matching attempts; for anchored regexs
+/* Match a back reference, possibly repeatedly. Look past the end of the
+/* Match a character class, possibly repeatedly. Look past the end of the
+/* Match a negated single character */
+/* Match a negated single character repeatedly. This is almost a repeat of
+/* Match a run of characters */
+/* Match a single character repeatedly; different opcodes share code. */
+/* Match a single character type repeatedly; several different opcodes
+/* Match a single character type; inline for speed */
+/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+/* Move the subject pointer back. This occurs only at the start of
+/* Negative assertion: all branches must fail to match */
+/* Now start processing the operations. */
+/* OP_KETRMAX */
+/* On entry ecode points to the first opcode, and eptr to the first character
+/* Opening capturing bracket. If there is space in the offset vector, save
+/* Or to a non-unique first char after study */
+/* Or to a unique first char if possible */
+/* Or to just after \n for a multiline match if possible */
+/* Other types of node can be handled by a switch */
+/* Otherwise test for either case */
+/* Print a sequence of chars in printable format, stopping at the end of the
+/* Recursion matches the current regex, nested. If there are any capturing
+/* Reset the maximum number of extractions we might see. */
+/* Reset the value of the ims flags, in case they got changed during
+/* Reset the working variable associated with each extraction. These should
+/* Separate the caselesss case for speed */
+/* Set up for repetition, or handle the non-repeated case */
+/* Set up the first character to match, if available. The first_char value is
+/* Skip over conditional reference data or large extraction number data if
+/* Start of subject assertion */
+/* Start of subject unless notbol, or after internal newline if multiline */
+/* Structure for building a chain of data that actually lives on the
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The condition is an assertion. Call match() to evaluate it - setting
+/* The ims options can vary during the matching as a result of the presence
+/* The repeating kets try the rest of the pattern or restart from the
+/* There's been some horrible disaster. */
+/* This "while" is the end of the "do" above */
+/* This function applies a compiled re to a subject string and picks out
+/* Use a macro for debugging printing, 'cause that limits the use of #ifdef
+/* We don't need to repeat the search if we haven't yet reached the
+/* When a match occurs, substrings will be set for all internal extractions;
+/* Word boundary assertions */
+/*************************************************
+1. This software is distributed in the hope that it will be useful,
+2. The origin of this software must not be misrepresented, either by
+3. Altered versions must be plainly marked as such, and must not be
+4. If PCRE is embedded in any software that is released under the GNU
+5.005. If there is an options reset, it will get obeyed in the normal
+6 : 3 + (ecode[1] << 8) + ecode[2]),
+< -1 => some kind of unexpected problem
+= 0 => success, but offsets is not big enough
+Arguments:
+BOOL anchored;
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL is_subject;
+BOOL minimize = FALSE;
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL rc;
+BOOL startline;
+BOOL using_temporary_offsets = FALSE;
+Copyright (c) 1997-2000 University of Cambridge
+DPRINTF ((">>>> returning %d\n", match_block.errorcode));
+DPRINTF ((">>>> returning %d\n", rc));
+DPRINTF (("Copied offsets from temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Got memory to hold back references\n"));
+DPRINTF (("Unknown opcode %d\n", *ecode));
+DPRINTF (("bracket %d failed\n", number));
+DPRINTF (("bracket 0 failed\n"));
+DPRINTF (("ims reset to %02lx\n", ims));
+DPRINTF (("ims set to %02lx at group repeat\n", ims));
+DPRINTF (("ims set to %02lx\n", ims));
+DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+DPRINTF (("start bracket 0\n"));
+GETCHAR (c, eptr) /* Get character */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+General Purpose Licence (GPL), then the terms of that licence shall
+However, if the referenced string is the empty string, always treat
+If the bracket fails to match, we need to restore this value and also the
+If there isn't enough space in the offset vector, treat this as if it were a
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+Otherwise, we can use the vector supplied, rounding down its size to a multiple
+Permission is granted to anyone to use this software for any purpose on any
+REPEATCHAR:
+REPEATNOTCHAR:
+REPEATTYPE:
+Returns: > 0 => success; value is the number of elements filled in
+Returns: TRUE if matched
+Returns: TRUE if matched
+Returns: nothing
+They are not both allowed to be zero. */
+This is a library of functions to support regular expressions whose syntax
+This is the forcible breaking of infinite loops as implemented in Perl
+Writing separate code makes it go faster, as does using an autoincrement and
+Written by: Philip Hazel <ph10@cam.ac.uk>
+a move back into the brackets. Check the alternative branches in turn - the
+address of eptr, so that eptr can be a register variable. */
+an assertion "group", stop matching and return TRUE, but record the
+an empty string - recursion will then try other alternatives, if any. */
+an error. Save the top 15 values on the stack, and accept that the rest
+an unanchored pattern, of course. If there's no first char and the pattern was
+analyzing most of the pattern. length > re->max_match_size is
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+and advance one byte in the pattern code. */
+and reinstate them after the recursion. However, we don't know how many
+and semantics are as close as possible to those of the Perl 5 language. See
+and the required character in fact is caseful. */
+at run time, so we have to test for anchoring. The first char may be unset for
+avoid duplicate testing (which takes significant time). This covers the vast
+backing off on a match. */
+bmtable = extra->data.bmtable;
+both cases of the character. Otherwise set the two values the same, which will
+bracketed group and go to there. */
+brackets - for testing for empty matches
+brackets started but not finished, we have to save their starting points
+break;
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+c != md->lcc[*eptr++])
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_NOTSTAR;
+c = *ecode++ - OP_STAR;
+c = *ecode++ - OP_TYPESTAR;
+c = *ecode++;
+c = *eptr++;
+c = 15;
+c = max - min;
+c = md->end_subject - eptr;
+c = md->lcc[c];
+c = md->offset_max;
+c == md->lcc[*eptr++])
+can't just fail here, because of the possibility of quantifiers with zero
+case OP_ALT:
+case OP_ANY:
+case OP_ASSERT:
+case OP_ASSERTBACK:
+case OP_ASSERTBACK_NOT:
+case OP_ASSERT_NOT:
+case OP_BEG_WORD:
+case OP_BRA: /* Non-capturing bracket: optimized */
+case OP_BRAMINZERO:
+case OP_BRANUMBER:
+case OP_BRAZERO:
+case OP_CHARS:
+case OP_CIRC:
+case OP_CLASS:
+case OP_COND:
+case OP_CREF:
+case OP_CRMINPLUS:
+case OP_CRMINQUERY:
+case OP_CRMINRANGE:
+case OP_CRMINSTAR:
+case OP_CRPLUS:
+case OP_CRQUERY:
+case OP_CRRANGE:
+case OP_CRSTAR:
+case OP_DIGIT:
+case OP_DOLL:
+case OP_END:
+case OP_END_WORD:
+case OP_EOD:
+case OP_EODN:
+case OP_EXACT:
+case OP_KET:
+case OP_KETRMAX:
+case OP_KETRMIN:
+case OP_MINPLUS:
+case OP_MINQUERY:
+case OP_MINSTAR:
+case OP_MINUPTO:
+case OP_NOT:
+case OP_NOTEXACT:
+case OP_NOTMINPLUS:
+case OP_NOTMINQUERY:
+case OP_NOTMINSTAR:
+case OP_NOTMINUPTO:
+case OP_NOTPLUS:
+case OP_NOTQUERY:
+case OP_NOTSTAR:
+case OP_NOTUPTO:
+case OP_NOT_DIGIT:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORD_BOUNDARY:
+case OP_ONCE:
+case OP_OPT:
+case OP_PLUS:
+case OP_QUERY:
+case OP_RECURSE:
+case OP_REF:
+case OP_REVERSE:
+case OP_SOD:
+case OP_STAR:
+case OP_TYPEEXACT:
+case OP_TYPEMINPLUS:
+case OP_TYPEMINQUERY:
+case OP_TYPEMINSTAR:
+case OP_TYPEMINUPTO:
+case OP_TYPEPLUS:
+case OP_TYPEQUERY:
+case OP_TYPESTAR:
+case OP_TYPEUPTO:
+case OP_UPTO:
+case OP_WHITESPACE:
+case OP_WORDCHAR:
+case OP_WORD_BOUNDARY:
+case matching may be when this character is hit, so test for it in both its
+caselessly, or if there are any changes of this flag within the regex, set up
+cases if necessary. However, the different cased versions will not be set up
+character" set. If the PCRE_CASELESS is set, implying that the match starts
+characters and work backwards. */
+code for maximizing the speed, and do the type test once at the start
+code to character type repeats - written out again for speed. */
+commoning these up that doesn't require a test of the positive/negative
+computer system, and to redistribute it freely, subject to the following
+const char *subject;
+const pcre *re;
+const pcre_extra *extra;
+const uschar *bmtable = NULL;
+const uschar *data = ecode + 1; /* Save for matching */
+const uschar *end_subject;
+const uschar *next = ecode + 1;
+const uschar *p = md->start_subject + md->offset_vector[offset];
+const uschar *p;
+const uschar *pp = eptr;
+const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
+const uschar *prev = ecode;
+const uschar *req_char_ptr = start_match - 1;
+const uschar *saved_eptr = eptr;
+const uschar *saved_eptr = eptrb->saved_eptr;
+const uschar *saved_eptr;
+const uschar *start_bits = NULL;
+const uschar *start_match = (const uschar *) subject + start_offset;
+continue; /* With the main loop */
+continue;
+course of events. */
+ctype = *ecode++; /* Code for the character type */
+cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+current high water mark for use by positive assertions. Do this also
+default: /* No repeat follows */
+default:
+do
+each branch of a lookbehind assertion. If we are too close to the start to
+each substring: the offsets to the start and end of the substring.
+ecode position in code
+ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += 2;
+ecode += 3 + (ecode[4] << 8) + ecode[5];
+ecode += 33; /* Advance past the item */
+ecode += 3; /* Advance past the item */
+ecode += 3;
+ecode += 5;
+ecode = next + 3;
+ecode++;
+else
+else if ((extra->options & PCRE_STUDY_BM) != 0)
+else if (first_char >= 0)
+else if (start_bits != NULL)
+else if (startline)
+encountered */
+end_subject = match_block.end_subject;
+eptr pointer in subject
+eptr points into the subject
+eptr += c;
+eptr += length;
+eptr += min;
+eptr -= (ecode[1] << 8) + ecode[2];
+eptr -= length;
+eptr = md->end_match_ptr;
+eptr++;
+eptrb pointer to chain of blocks containing eptr at start of
+eptrb = &newptrb;
+eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
+eptrblock *eptrb;
+eptrblock newptrb;
+eptrblock;
+exactly what going to the ket would do. */
+explicit claim or by omission.
+external_extra points to "hints" from pcre_study() or is NULL
+external_re points to the compiled expression
+extraction by setting the offsets and bumping the high water mark. */
+first_char = match_block.lcc[first_char];
+first_char = re->first_char;
+flags can contain
+for (;;)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= min; i++)
+for (i = min; i < max; i++)
+for (i = min;; i++)
+for the "once" (not-backup up) groups. */
+for the match to succeed. If the first character is set, req_char must be
+found it, so that we don't search again next time round the loop if
+from a previous iteration of this group, and be referred to by a reference
+goto REPEATCHAR;
+goto REPEATNOTCHAR;
+goto REPEATTYPE;
+group number back at the start and if necessary complete handling an
+happens for a repeating ket if no characters were matched in the group.
+here; that is handled in the code for KET. */
+hold, we get a temporary bit of working store to use during the matching.
+i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
+if (!anchored)
+if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!md->endonly)
+if (!rc)
+if (!startline && extra != NULL)
+if ((*ecode++ == OP_WORD_BOUNDARY) ?
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) == 0)
+if ((extra->options & PCRE_STUDY_MAPPED) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_isgroup) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_DOTALL) == 0 && c == '\n')
+if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) == 0)
+if ((md->ctypes[*eptr++] & ctype_space) != 0)
+if ((md->ctypes[*eptr++] & ctype_space) == 0)
+if ((md->ctypes[*eptr++] & ctype_word) != 0)
+if ((md->ctypes[*eptr++] & ctype_word) == 0)
+if ((md->ctypes[c] & ctype_digit) != 0)
+if ((md->ctypes[c] & ctype_digit) == 0)
+if ((md->ctypes[c] & ctype_space) != 0)
+if ((md->ctypes[c] & ctype_space) == 0)
+if ((md->ctypes[c] & ctype_word) != 0)
+if ((md->ctypes[c] & ctype_word) == 0)
+if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
+if ((re->options & PCRE_FIRSTSET) != 0)
+if ((re->options & PCRE_REQCHSET) != 0)
+if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
+if (*ecode != OP_ONCE && *ecode != OP_ALT)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET)
+if (*ecode == OP_KETRMIN)
+if (*ecode++ != *eptr++)
+if (*ecode++ == *eptr++)
+if (*eptr != '\n')
+if (*eptr++ == '\n')
+if (*p++ != *eptr++)
+if (*p++ == req_char)
+if (*prev != OP_COND)
+if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
+if (bmtable != NULL)
+if (bmtable[*start_match])
+if (c != *eptr++)
+if (c != md->lcc[*eptr++])
+if (c < 16)
+if (c == *eptr++)
+if (c == md->lcc[*eptr++])
+if (c > md->end_subject - eptr)
+if (cur_is_word == prev_is_word ||
+if (ecode[3] == OP_CREF) /* Condition is extraction test */
+if (ecode[3] == OP_OPT)
+if (eptr != md->start_subject && eptr[-1] != '\n')
+if (eptr != md->start_subject)
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject)
+if (eptr < md->start_subject)
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+if (eptr >= md->end_subject || *eptr == '\n')
+if (eptr >= md->end_subject || c != *eptr)
+if (eptr >= md->end_subject || c != md->lcc[*eptr])
+if (eptr >= md->end_subject || c == *eptr)
+if (eptr >= md->end_subject || c == md->lcc[*eptr])
+if (eptr >= md->end_subject)
+if (eptr++ >= md->end_subject)
+if (i >= max || !match_ref (offset, eptr, length, md, ims))
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject || c != *eptr++)
+if (i >= max || eptr >= md->end_subject || c == *eptr++)
+if (i >= max || eptr >= md->end_subject)
+if (is_subject && length > md->end_subject - p)
+if (isprint (c = *(p++)))
+if (length == 0)
+if (length > md->end_subject - eptr)
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match_block.end_offset_top > offsetcount)
+if (match_block.offset_vector != NULL)
+if (match_block.offset_vector == NULL)
+if (max == 0)
+if (md->lcc[*ecode++] != md->lcc[*eptr++])
+if (md->lcc[*ecode++] == md->lcc[*eptr++])
+if (md->lcc[*p++] != md->lcc[*eptr++])
+if (md->notbol && eptr == md->start_subject)
+if (md->notempty && eptr == md->start_match)
+if (md->noteol)
+if (min == max)
+if (min > 0)
+if (min > md->end_subject - eptr)
+if (minimize)
+if (number > 0)
+if (number > EXTRACT_BASIC_MAX)
+if (offset < md->offset_max)
+if (offset >= md->offset_max)
+if (offset_top <= offset)
+if (offsetcount < 2)
+if (offsetcount >= 4)
+if (op > OP_BRA)
+if (p > req_char_ptr)
+if (p >= end_subject)
+if (pp == req_char || pp == req_char2)
+if (re == NULL || subject == NULL ||
+if (re->magic_number != MAGIC_NUMBER)
+if (re->max_match_size >= 0
+if (re->top_backref > 0 && re->top_backref >= ocount / 3)
+if (req_char == req_char2)
+if (req_char >= 0)
+if (resetcount > offsetcount)
+if (save != stacksave)
+if (save == NULL)
+if (skipped_chars)
+if (start_match + bmtable[256] > end_subject)
+if (start_match > match_block.start_subject + start_offset)
+if (using_temporary_offsets)
+if certain parts of the pattern were not used. */
+if the malloc fails ... there is no way of returning to the top level with
+implied in the second condition, because start_offset > 0. */
+ims current /i, /m, and /s options
+ims the ims flags
+ims = (ims & ~PCRE_IMS) | ecode[4];
+ims = ecode[1];
+ims = original_ims;
+ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
+in the pattern. */
+in the subject string, while eptrb holds the value of eptr at the start of the
+initialize them to avoid reading uninitialized locations. */
+inline, and there are *still* stupid compilers about that don't like indented
+inside the group.
+int
+int *offsets;
+int *save;
+int c;
+int first_char = -1;
+int flags;
+int length;
+int min, max, ctype;
+int number = *prev - OP_BRA;
+int number = op - OP_BRA;
+int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
+int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
+int offset;
+int offset_top;
+int offsetcount;
+int op = (int) *ecode;
+int options;
+int rc;
+int req_char = -1;
+int req_char2 = -1;
+int resetcount, ocount;
+int save_offset1 = md->offset_vector[offset];
+int save_offset2 = md->offset_vector[offset + 1];
+int save_offset3 = md->offset_vector[md->offset_end - number];
+int skipped_chars = 0;
+int stacksave[15];
+int start_offset;
+is a bit large to put on the stack, but using malloc for small numbers
+is_subject TRUE if printing from within md->start_subject
+it as matched, any number of times (otherwise there could be infinite
+item to see if there is repeat information following. The code is similar
+item to see if there is repeat information following. Then obey similar
+last bracketed group - used for breaking infinite loops matching zero-length
+later in the subject; otherwise the test starts at the match point. This
+length length of subject string (may contain binary zeros)
+length length to be matched
+length number to print
+length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
+length = md->end_subject - p;
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+loop. */
+loops). */
+main loop. */
+majority of cases. It will be suboptimal when the case flag changes in a regex
+mark, since extracts may have been taken during the assertion. */
+mark, since extracts may have been taken. */
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode, offset_top, md, ims, eptrb, flags)
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match_block.ctypes = re->tables + ctypes_offset;
+match_block.end_subject = match_block.start_subject + length;
+match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
+match_block.errorcode == PCRE_ERROR_NOMATCH &&
+match_block.lcc = re->tables + lcc_offset;
+match_block.lcc[*start_match] != first_char)
+match_block.notbol = (options & PCRE_NOTBOL) != 0;
+match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
+match_block.noteol = (options & PCRE_NOTEOL) != 0;
+match_block.offset_end = ocount;
+match_block.offset_max = (2 * ocount) / 3;
+match_block.offset_overflow = FALSE;
+match_block.offset_overflow = TRUE;
+match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
+match_block.offset_vector = offsets;
+match_block.start_match = start_match;
+match_block.start_pattern = re->code;
+match_block.start_subject = (const uschar *) subject;
+match_condassert - this is an assertion condition
+match_condassert | match_isgroup))
+match_data *md;
+match_data match_block;
+match_isgroup - this is the start of a bracketed group
+match_isgroup);
+match_ref (offset, eptr, length, md, ims)
+matches, we carry on as at the end of a normal bracket, leaving the subject
+matching won't pass the KET for an assertion. If any one branch matches,
+matching won't pass the KET for this kind of subpattern. If any one branch
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[3] << 8) + ecode[4];
+max = INT_MAX;
+max = rep_max[c]; /* zero for max => infinity */
+max, eptr));
+maximum. Alternatively, if maximizing, find the maximum number of
+may be wrong. */
+md pointer to "static" info for the match
+md pointer to matching data block, if is_subject is TRUE
+md points to match data block
+md->end_match_ptr = eptr; /* For ONCE */
+md->end_match_ptr = eptr; /* Record where we ended */
+md->end_offset_top = offset_top; /* and how many extracts were taken */
+md->end_offset_top = offset_top;
+md->end_subject - eptr + 1 :
+md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
+md->offset_overflow = TRUE;
+md->offset_vector[md->offset_end - i] = save[i];
+md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
+md->offset_vector[md->offset_end - number] = save_offset3;
+md->offset_vector[md->offset_end - number];
+md->offset_vector[offset + 1] - md->offset_vector[offset];
+md->offset_vector[offset + 1] = eptr - md->start_subject;
+md->offset_vector[offset + 1] = save_offset2;
+md->offset_vector[offset] =
+md->offset_vector[offset] = save_offset1;
+memcpy (offsets + 2, match_block.offset_vector + 2,
+min = (ecode[1] << 8) + ecode[2];
+min = 0;
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = 1;
+min = rep_min[c]; /* Pick up values from tables; */
+minima. */
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (c & 1) != 0;
+minimize = *ecode == OP_MINUPTO;
+minimize = *ecode == OP_NOTMINUPTO;
+minimize = *ecode == OP_TYPEMINUPTO;
+minimize = TRUE;
+minimum number of matches are present. If min = max, continue at the same
+misrepresented as being the original software.
+move back, this match function fails. */
+mustn't change the current values of the data slot, because they may be set
+need to recurse. */
+never be used unless previously set, but they get saved and restored, and so we
+never set for an anchored regular expression, but the anchoring may be forced
+newline unless endonly is set, else end of subject unless noteol is set. */
+newptrb.prev = eptrb;
+newptrb.saved_eptr = eptr;
+next += (next[1] << 8) + next[2];
+non-capturing bracket. Don't worry about setting the flag for the error case
+number = (ecode[4] << 8) | ecode[5];
+number = (prev[4] << 8) | prev[5];
+number from a dummy opcode at the start. */
+number, then move along the subject till after the recursive match,
+ocount = offsetcount - (offsetcount % 3);
+ocount = re->top_backref * 3 + 3;
+of (?ims) items in the pattern. They are kept in a local variable so that
+of 3. */
+of subject left; this ensures that every attempt at a match fails. We
+offset index into the offset vector
+offset = number << 1;
+offset_top current top pointer
+offset_top = md->end_offset_top;
+offset_top = offset + 2;
+offset_top, md, ims, eptrb, match_isgroup);
+offsetcount the number of elements in the vector
+offsets points to a vector of ints to be filled in with offsets
+offsets[0] = start_match - match_block.start_subject;
+offsets[1] = match_block.end_match_ptr - match_block.start_subject;
+op = OP_BRA;
+opcode. */
+optimization can save a huge amount of backtracking in patterns with nested
+option for each character match. Maybe that wouldn't add very much to the
+options option bits
+p points to characters
+p--;
+past the end if there is only one branch, but that's OK because that is
+pchars (ecode, length, FALSE, md);
+pchars (eptr, 16, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (p, length, FALSE, md);
+pchars (p, length, is_subject, md)
+pchars (start_match, end_subject - start_match, TRUE, &match_block);
+pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
+place we found it at last time. */
+pointer. */
+portions of the string if it matches. Two elements in the vector are set for
+pre-processor statements. I suppose it's only been 10 years... */
+preceded by BRAZERO or BRAMINZERO. */
+preceding bracket, in the appropriate order. */
+preceding bracket, in the appropriate order. We need to reset any options
+printf (" against backref ");
+printf (" against pattern ");
+printf ("%c", c);
+printf (">>>> Match against: ");
+printf (">>>>> Skipped %d chars to reach first character\n",
+printf ("\\x%02x", c);
+printf ("\n");
+printf ("end bracket %d", number);
+printf ("matching subject ");
+printf ("matching subject <null> against pattern ");
+printf ("matching subject <null>");
+printf ("start bracket %d subject=", number);
+rc = 0;
+rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
+rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
+register const uschar *ecode;
+register const uschar *eptr;
+register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
+register int *iend = iptr + resetcount;
+register int *iend = iptr - resetcount / 2 + 1;
+register int *iptr = match_block.offset_vector + ocount;
+register int *iptr = match_block.offset_vector;
+register int c = *start_match;
+register int c;
+register int i;
+register int length = ecode[1];
+register int pp = *p++;
+repeat it in the interests of efficiency. */
+repeat limits are compiled as a number of copies, with the optional ones
+req_char = re->req_char;
+req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
+req_char_ptr = p;
+resetcount = 2 + re->top_bracket * 2;
+resetcount = ocount;
+restoring at the exit of a group is easy. */
+restrictions:
+return FALSE;
+return PCRE_ERROR_BADMAGIC;
+return PCRE_ERROR_BADOPTION;
+return PCRE_ERROR_NOMATCH;
+return PCRE_ERROR_NOMEMORY;
+return PCRE_ERROR_NULL;
+return TRUE;
+return match (eptr,
+return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
+return match_block.errorcode;
+return rc;
+save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
+save = stacksave;
+save[i] = md->offset_vector[md->offset_end - i];
+seems expensive. As a compromise, the stack is used when there are fewer
+share code. This is very similar to the code for single characters, but we
+similar code to character type repeats - written out again for speed.
+since matching characters is likely to be quite common. First, ensure the
+skipped_chars += bmtable[*start_match],
+skipped_chars += bmtable[256] - 1;
+skipped_chars -= bmtable[256] - 1;
+skipped_chars);
+skipped_chars++,
+stack of such pointers, to be re-instated at the end of the group when we hit
+stack, for holding the values of the subject pointer at the start of each
+start of each branch to move the current point backwards, so the code at
+start_bits = extra->data.start_bits;
+start_match += bmtable[*start_match];
+start_match += bmtable[256] - 1;
+start_match -= bmtable[256] - 1;
+start_match = (const uschar *) subject + length - re->max_match_size;
+start_match++ < end_subject);
+start_match++;
+start_offset where to start in the subject string
+startline = (re->options & PCRE_STARTLINE) != 0;
+static BOOL
+static const char rep_max[] =
+static const char rep_min[] =
+static void
+strings.
+struct eptrblock *prev;
+studied, there may be a bitmap of possible first characters. */
+subject points to the subject string
+subject if the requested.
+subpattern - to break infinite loops. */
+subpattern, so as to detect when an empty string has been matched by a
+subsequent match. */
+such there are (offset_top records the completed total) so we just have
+supersede any condition above with which it is incompatible.
+switch (*ecode)
+switch (ctype)
+switch (op)
+test once at the start (i.e. keep it out of the loop). */
+than 16 values to store; otherwise malloc is used. A problem is what to do
+than the number of characters left in the string, so the match fails.
+that "continue" in the code above comes out to here to repeat the main
+that changed within the bracket before re-running it, so check the next
+that it may occur zero times. It may repeat infinitely, or not at all -
+the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
+the closing ket. When match() is called in other circumstances, we don't add to
+the code for a repeated single character, but I haven't found a nice way of
+the current subject position in the working slot at the top of the vector. We
+the expression and advancing one matching character if failing, up to the
+the external pcre header. */
+the file Tech.Notes for some information on the internals.
+the final argument TRUE causes it to stop at the end of an assertion. */
+the group. */
+the length of the reference string explicitly rather than passing the
+the loop runs just once. */
+the minimum number of bytes before we start. */
+the number from a dummy opcode at the start. */
+the point in the subject string is not moved back. Thus there can never be
+the pointer while it matches the class. */
+the same bracket.
+the stack. */
+the start hasn't passed this character yet. */
+the subject. */
+there were too many extractions, set the return code to zero. In the case
+this level is identical to the lookahead case. */
+this makes a huge difference to execution time when there aren't many brackets
+those back references that we can. In this case there need not be overflow
+time taken, but character matching *is* what this is all about... */
+to save all the potential data. There may be up to 99 such values, which
+to that for character classes, but repeated for efficiency. Then obey
+two branches. If the condition is false, skipping the first branch takes us
+typedef struct eptrblock
+unless PCRE_CASELESS was given or the casing state changes within the regex.
+unlimited repeats that aren't going to match. We don't know what the state of
+unsigned long int ims = 0;
+unsigned long int ims;
+unsigned long int original_ims = ims; /* Save for resetting on ')' */
+up quickly if there are fewer than the minimum number of characters left in
+using_temporary_offsets = TRUE;
+values of the final offsets, in case they were set by a previous iteration of
+we just need to set up the whole thing as substring 0 before returning. If
+where we had to get some local store to hold offsets for backreferences, copy
+while (!anchored &&
+while (*ecode == OP_ALT)
+while (*ecode == OP_ALT);
+while (*next == OP_ALT);
+while (--iptr >= iend)
+while (eptr >= pp)
+while (iptr < iend)
+while (length-- > 0)
+while (p < end_subject)
+while (start_match < end_subject &&
+while (start_match < end_subject && *start_match != first_char)
+while (start_match < end_subject && start_match[-1] != '\n')
+while (start_match < end_subject)
+{
+{0, 0, 0, 0, 1, 1};
+{0, 0, 1, 1, 0, 0};
+} /* End of main loop */
+}
diff --git a/testsuite/uniq.inp b/testsuite/uniq.inp
new file mode 100644
index 0000000..b1eddf3
--- /dev/null
+++ b/testsuite/uniq.inp
@@ -0,0 +1,2058 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#define DPRINTF(p) /*nothing */
+#define DPRINTF(p) printf p
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define class pcre_class
+#define match_condassert 0x01 /* Called to check a condition assertion */
+#define match_isgroup 0x02 /* Set if start of bracketed group */
+#else
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef __cplusplus
+#include "internal.h"
+&& length - re->max_match_size > start_offset)
+((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(i.e. keep it out of the loop). Also we can test that there are at least
+(md->ctypes[*eptr++] & ctype_digit) != 0)
+(md->ctypes[*eptr++] & ctype_digit) == 0)
+(md->ctypes[*eptr++] & ctype_space) != 0)
+(md->ctypes[*eptr++] & ctype_space) == 0)
+(md->ctypes[*eptr++] & ctype_word) != 0)
+(md->ctypes[*eptr++] & ctype_word) == 0)
+(offsetcount - 2) * sizeof (int));
+(offsets == NULL && offsetcount > 0))
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (save);
+(re->tables + fcc_offset)[req_char] : req_char;
+* Match a back-reference *
+* Execute a Regular Expression *
+* Match from current position *
+* Debugging function to print chars *
+* Perl-Compatible Regular Expressions *
+* Macros and tables for character handling *
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*/
+*/
+*/
+*/
+*/
+*iptr = -1;
+*iptr++ = -1;
+*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
+*prev == OP_ONCE)
+-----------------------------------------------------------------------------
+-----------------------------------------------------------------------------
+-1 => failed to match
+/*
+/* "Once" brackets are like assertion brackets except that after a match,
+/* ... else fall through */
+/* ... else fall through */
+/* Advance to a possible match for an initial string after study */
+/* Allow compilation as C++ source code, should anybody want to do that. */
+/* Always fail if not enough characters left */
+/* An alternation is the end of a branch; scan along to find the end of the
+/* Assert before internal newline if multiline, or before a terminating
+/* Assertion brackets. Check the alternative branches in turn - the
+/* At the start of a bracketed group, add the current subject pointer to the
+/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
+/* Caseful comparisons */
+/* Caseful comparisons */
+/* Change option settings */
+/* Common code for all repeated single character type matches */
+/* Common code for all repeated single-character matches. We can give
+/* Common code for all repeated single-character matches. We can give
+/* Compute the minimum number of offsets that we need to reset each time. Doing
+/* Conditional group: compilation checked that there are no more than
+/* Continue as from after the assertion, updating the offsets high water
+/* Continue from after the assertion, updating the offsets high water
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never reaches here */
+/* Control never reaches here */
+/* Copy the offset information from temporary store if necessary */
+/* Do a single test if no case difference is set up */
+/* Do not stick any code in here without much thought; it is assumed
+/* End of a group, repeated or non-repeating. If we are at the end of
+/* End of subject assertion (\z) */
+/* End of subject or ending \n assertion (\Z) */
+/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
+/* First, ensure the minimum number of matches are present. */
+/* First, ensure the minimum number of matches are present. Use inline
+/* First, ensure the minimum number of matches are present. We get back
+/* Flag bits for the match() function */
+/* For a non-repeating ket, just continue at this level. This also
+/* For a non-repeating ket, just continue at this level. This also
+/* For anchored or unanchored matches, there may be a "last known required
+/* For extended extraction brackets (large number), we have to fish out
+/* For extended extraction brackets (large number), we have to fish out the
+/* For matches anchored to the end of the pattern, we can often avoid
+/* If a back reference hasn't been set, the length that is passed is greater
+/* If checking an assertion for a condition, return TRUE. */
+/* If hit the end of the group (which could be repeated), fail */
+/* If max == min we can continue with the main loop without the
+/* If maximizing it is worth using inline code for speed, doing the type
+/* If maximizing, find the longest possible run, then work backwards. */
+/* If maximizing, find the longest string and work backwards */
+/* If min = max, continue at the same level without recursing */
+/* If min = max, continue at the same level without recursion.
+/* If minimizing, keep testing the rest of the expression and advancing
+/* If minimizing, keep trying and advancing the pointer */
+/* If minimizing, we have to test the rest of the pattern before each
+/* If req_char is set, we know that that character must appear in the subject
+/* If the expression has got more back references than the offsets supplied can
+/* If the length of the reference is zero, just continue with the
+/* If the reference is unset, set the length to be longer than the amount
+/* If we can't find the required character, break the matching loop */
+/* If we have found the required character, save the point where we
+/* In all other cases except a conditional group we have to check the
+/* In case the recursion has set more capturing values, save the final
+/* Include the internals header, which itself includes Standard C headers plus
+/* Insufficient room for saving captured contents */
+/* Loop for handling unanchored repeated matching attempts; for anchored regexs
+/* Match a back reference, possibly repeatedly. Look past the end of the
+/* Match a character class, possibly repeatedly. Look past the end of the
+/* Match a negated single character */
+/* Match a negated single character repeatedly. This is almost a repeat of
+/* Match a run of characters */
+/* Match a single character repeatedly; different opcodes share code. */
+/* Match a single character type repeatedly; several different opcodes
+/* Match a single character type; inline for speed */
+/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+/* Move the subject pointer back. This occurs only at the start of
+/* Negative assertion: all branches must fail to match */
+/* Now start processing the operations. */
+/* OP_KETRMAX */
+/* OP_KETRMAX */
+/* On entry ecode points to the first opcode, and eptr to the first character
+/* Opening capturing bracket. If there is space in the offset vector, save
+/* Or to a non-unique first char after study */
+/* Or to a unique first char if possible */
+/* Or to just after \n for a multiline match if possible */
+/* Other types of node can be handled by a switch */
+/* Otherwise test for either case */
+/* Print a sequence of chars in printable format, stopping at the end of the
+/* Recursion matches the current regex, nested. If there are any capturing
+/* Reset the maximum number of extractions we might see. */
+/* Reset the value of the ims flags, in case they got changed during
+/* Reset the working variable associated with each extraction. These should
+/* Separate the caselesss case for speed */
+/* Set up for repetition, or handle the non-repeated case */
+/* Set up the first character to match, if available. The first_char value is
+/* Skip over conditional reference data or large extraction number data if
+/* Start of subject assertion */
+/* Start of subject unless notbol, or after internal newline if multiline */
+/* Structure for building a chain of data that actually lives on the
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The condition is an assertion. Call match() to evaluate it - setting
+/* The ims options can vary during the matching as a result of the presence
+/* The repeating kets try the rest of the pattern or restart from the
+/* The repeating kets try the rest of the pattern or restart from the
+/* There's been some horrible disaster. */
+/* This "while" is the end of the "do" above */
+/* This function applies a compiled re to a subject string and picks out
+/* Use a macro for debugging printing, 'cause that limits the use of #ifdef
+/* We don't need to repeat the search if we haven't yet reached the
+/* When a match occurs, substrings will be set for all internal extractions;
+/* Word boundary assertions */
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+1. This software is distributed in the hope that it will be useful,
+2. The origin of this software must not be misrepresented, either by
+3. Altered versions must be plainly marked as such, and must not be
+4. If PCRE is embedded in any software that is released under the GNU
+5.005. If there is an options reset, it will get obeyed in the normal
+5.005. If there is an options reset, it will get obeyed in the normal
+6 : 3 + (ecode[1] << 8) + ecode[2]),
+< -1 => some kind of unexpected problem
+= 0 => success, but offsets is not big enough
+Arguments:
+Arguments:
+Arguments:
+Arguments:
+BOOL anchored;
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL is_subject;
+BOOL minimize = FALSE;
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL rc;
+BOOL startline;
+BOOL using_temporary_offsets = FALSE;
+Copyright (c) 1997-2000 University of Cambridge
+DPRINTF ((">>>> returning %d\n", match_block.errorcode));
+DPRINTF ((">>>> returning %d\n", rc));
+DPRINTF (("Copied offsets from temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Got memory to hold back references\n"));
+DPRINTF (("Unknown opcode %d\n", *ecode));
+DPRINTF (("bracket %d failed\n", number));
+DPRINTF (("bracket 0 failed\n"));
+DPRINTF (("ims reset to %02lx\n", ims));
+DPRINTF (("ims set to %02lx at group repeat\n", ims));
+DPRINTF (("ims set to %02lx\n", ims));
+DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+DPRINTF (("start bracket 0\n"));
+GETCHAR (c, eptr) /* Get character */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+General Purpose Licence (GPL), then the terms of that licence shall
+However, if the referenced string is the empty string, always treat
+If the bracket fails to match, we need to restore this value and also the
+If there isn't enough space in the offset vector, treat this as if it were a
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+Otherwise, we can use the vector supplied, rounding down its size to a multiple
+Permission is granted to anyone to use this software for any purpose on any
+REPEATCHAR:
+REPEATNOTCHAR:
+REPEATTYPE:
+Returns: > 0 => success; value is the number of elements filled in
+Returns: TRUE if matched
+Returns: TRUE if matched
+Returns: nothing
+They are not both allowed to be zero. */
+This is a library of functions to support regular expressions whose syntax
+This is the forcible breaking of infinite loops as implemented in Perl
+This is the forcible breaking of infinite loops as implemented in Perl
+Writing separate code makes it go faster, as does using an autoincrement and
+Written by: Philip Hazel <ph10@cam.ac.uk>
+a move back into the brackets. Check the alternative branches in turn - the
+address of eptr, so that eptr can be a register variable. */
+an assertion "group", stop matching and return TRUE, but record the
+an empty string - recursion will then try other alternatives, if any. */
+an error. Save the top 15 values on the stack, and accept that the rest
+an unanchored pattern, of course. If there's no first char and the pattern was
+analyzing most of the pattern. length > re->max_match_size is
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+and advance one byte in the pattern code. */
+and reinstate them after the recursion. However, we don't know how many
+and semantics are as close as possible to those of the Perl 5 language. See
+and the required character in fact is caseful. */
+at run time, so we have to test for anchoring. The first char may be unset for
+avoid duplicate testing (which takes significant time). This covers the vast
+backing off on a match. */
+bmtable = extra->data.bmtable;
+both cases of the character. Otherwise set the two values the same, which will
+bracketed group and go to there. */
+brackets - for testing for empty matches
+brackets started but not finished, we have to save their starting points
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+c != md->lcc[*eptr++])
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_NOTSTAR;
+c = *ecode++ - OP_STAR;
+c = *ecode++ - OP_TYPESTAR;
+c = *ecode++;
+c = *ecode++;
+c = *eptr++;
+c = 15;
+c = max - min;
+c = md->end_subject - eptr;
+c = md->lcc[c];
+c = md->lcc[c];
+c = md->offset_max;
+c == md->lcc[*eptr++])
+can't just fail here, because of the possibility of quantifiers with zero
+case OP_ALT:
+case OP_ANY:
+case OP_ANY:
+case OP_ANY:
+case OP_ANY:
+case OP_ASSERT:
+case OP_ASSERTBACK:
+case OP_ASSERTBACK_NOT:
+case OP_ASSERT_NOT:
+case OP_BEG_WORD:
+case OP_BRA: /* Non-capturing bracket: optimized */
+case OP_BRAMINZERO:
+case OP_BRANUMBER:
+case OP_BRAZERO:
+case OP_CHARS:
+case OP_CIRC:
+case OP_CLASS:
+case OP_COND:
+case OP_CREF:
+case OP_CRMINPLUS:
+case OP_CRMINPLUS:
+case OP_CRMINQUERY:
+case OP_CRMINQUERY:
+case OP_CRMINRANGE:
+case OP_CRMINRANGE:
+case OP_CRMINSTAR:
+case OP_CRMINSTAR:
+case OP_CRPLUS:
+case OP_CRPLUS:
+case OP_CRQUERY:
+case OP_CRQUERY:
+case OP_CRRANGE:
+case OP_CRRANGE:
+case OP_CRSTAR:
+case OP_CRSTAR:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DOLL:
+case OP_END:
+case OP_END_WORD:
+case OP_EOD:
+case OP_EODN:
+case OP_EXACT:
+case OP_KET:
+case OP_KETRMAX:
+case OP_KETRMIN:
+case OP_MINPLUS:
+case OP_MINQUERY:
+case OP_MINSTAR:
+case OP_MINUPTO:
+case OP_NOT:
+case OP_NOTEXACT:
+case OP_NOTMINPLUS:
+case OP_NOTMINQUERY:
+case OP_NOTMINSTAR:
+case OP_NOTMINUPTO:
+case OP_NOTPLUS:
+case OP_NOTQUERY:
+case OP_NOTSTAR:
+case OP_NOTUPTO:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORD_BOUNDARY:
+case OP_ONCE:
+case OP_OPT:
+case OP_PLUS:
+case OP_QUERY:
+case OP_RECURSE:
+case OP_REF:
+case OP_REVERSE:
+case OP_SOD:
+case OP_STAR:
+case OP_TYPEEXACT:
+case OP_TYPEMINPLUS:
+case OP_TYPEMINQUERY:
+case OP_TYPEMINSTAR:
+case OP_TYPEMINUPTO:
+case OP_TYPEPLUS:
+case OP_TYPEQUERY:
+case OP_TYPESTAR:
+case OP_TYPEUPTO:
+case OP_UPTO:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORD_BOUNDARY:
+case matching may be when this character is hit, so test for it in both its
+caselessly, or if there are any changes of this flag within the regex, set up
+cases if necessary. However, the different cased versions will not be set up
+character" set. If the PCRE_CASELESS is set, implying that the match starts
+characters and work backwards. */
+characters and work backwards. */
+code for maximizing the speed, and do the type test once at the start
+code to character type repeats - written out again for speed. */
+commoning these up that doesn't require a test of the positive/negative
+computer system, and to redistribute it freely, subject to the following
+const char *subject;
+const pcre *re;
+const pcre_extra *extra;
+const uschar *bmtable = NULL;
+const uschar *data = ecode + 1; /* Save for matching */
+const uschar *end_subject;
+const uschar *next = ecode + 1;
+const uschar *next = ecode + 1;
+const uschar *p = md->start_subject + md->offset_vector[offset];
+const uschar *p;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
+const uschar *prev = ecode;
+const uschar *req_char_ptr = start_match - 1;
+const uschar *saved_eptr = eptr;
+const uschar *saved_eptr = eptrb->saved_eptr;
+const uschar *saved_eptr;
+const uschar *start_bits = NULL;
+const uschar *start_match = (const uschar *) subject + start_offset;
+continue; /* With the main loop */
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+course of events. */
+course of events. */
+ctype = *ecode++; /* Code for the character type */
+cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+current high water mark for use by positive assertions. Do this also
+default: /* No repeat follows */
+default: /* No repeat follows */
+default:
+do
+do
+do
+do
+do
+do
+do
+do
+do
+do
+do
+each branch of a lookbehind assertion. If we are too close to the start to
+each substring: the offsets to the start and end of the substring.
+ecode position in code
+ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += 2;
+ecode += 2;
+ecode += 3 + (ecode[4] << 8) + ecode[5];
+ecode += 33; /* Advance past the item */
+ecode += 3; /* Advance past the item */
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 5;
+ecode += 5;
+ecode = next + 3;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else if ((extra->options & PCRE_STUDY_BM) != 0)
+else if (first_char >= 0)
+else if (start_bits != NULL)
+else if (startline)
+encountered */
+end_subject = match_block.end_subject;
+eptr pointer in subject
+eptr points into the subject
+eptr += c;
+eptr += length;
+eptr += length;
+eptr += length;
+eptr += length;
+eptr += min;
+eptr -= (ecode[1] << 8) + ecode[2];
+eptr -= length;
+eptr = md->end_match_ptr;
+eptr = md->end_match_ptr;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptrb pointer to chain of blocks containing eptr at start of
+eptrb = &newptrb;
+eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
+eptrblock *eptrb;
+eptrblock newptrb;
+eptrblock;
+exactly what going to the ket would do. */
+explicit claim or by omission.
+external_extra points to "hints" from pcre_study() or is NULL
+external_re points to the compiled expression
+extraction by setting the offsets and bumping the high water mark. */
+first_char = match_block.lcc[first_char];
+first_char = re->first_char;
+flags can contain
+for (;;)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for the "once" (not-backup up) groups. */
+for the match to succeed. If the first character is set, req_char must be
+found it, so that we don't search again next time round the loop if
+from a previous iteration of this group, and be referred to by a reference
+goto REPEATCHAR;
+goto REPEATCHAR;
+goto REPEATNOTCHAR;
+goto REPEATNOTCHAR;
+goto REPEATTYPE;
+goto REPEATTYPE;
+group number back at the start and if necessary complete handling an
+happens for a repeating ket if no characters were matched in the group.
+happens for a repeating ket if no characters were matched in the group.
+here; that is handled in the code for KET. */
+hold, we get a temporary bit of working store to use during the matching.
+i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
+if (!anchored)
+if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!md->endonly)
+if (!rc)
+if (!startline && extra != NULL)
+if ((*ecode++ == OP_WORD_BOUNDARY) ?
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) == 0)
+if ((extra->options & PCRE_STUDY_MAPPED) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_isgroup) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_DOTALL) == 0 && c == '\n')
+if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) == 0)
+if ((md->ctypes[*eptr++] & ctype_space) != 0)
+if ((md->ctypes[*eptr++] & ctype_space) == 0)
+if ((md->ctypes[*eptr++] & ctype_word) != 0)
+if ((md->ctypes[*eptr++] & ctype_word) == 0)
+if ((md->ctypes[c] & ctype_digit) != 0)
+if ((md->ctypes[c] & ctype_digit) == 0)
+if ((md->ctypes[c] & ctype_space) != 0)
+if ((md->ctypes[c] & ctype_space) == 0)
+if ((md->ctypes[c] & ctype_word) != 0)
+if ((md->ctypes[c] & ctype_word) == 0)
+if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
+if ((re->options & PCRE_FIRSTSET) != 0)
+if ((re->options & PCRE_REQCHSET) != 0)
+if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
+if (*ecode != OP_ONCE && *ecode != OP_ALT)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET)
+if (*ecode == OP_KETRMIN)
+if (*ecode == OP_KETRMIN)
+if (*ecode++ != *eptr++)
+if (*ecode++ == *eptr++)
+if (*eptr != '\n')
+if (*eptr++ == '\n')
+if (*p++ != *eptr++)
+if (*p++ == req_char)
+if (*prev != OP_COND)
+if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
+if (bmtable != NULL)
+if (bmtable[*start_match])
+if (c != *eptr++)
+if (c != md->lcc[*eptr++])
+if (c < 16)
+if (c == *eptr++)
+if (c == md->lcc[*eptr++])
+if (c > md->end_subject - eptr)
+if (cur_is_word == prev_is_word ||
+if (ecode[3] == OP_CREF) /* Condition is extraction test */
+if (ecode[3] == OP_OPT)
+if (eptr != md->start_subject && eptr[-1] != '\n')
+if (eptr != md->start_subject)
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject)
+if (eptr < md->end_subject)
+if (eptr < md->start_subject)
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+if (eptr >= md->end_subject || *eptr == '\n')
+if (eptr >= md->end_subject || c != *eptr)
+if (eptr >= md->end_subject || c != md->lcc[*eptr])
+if (eptr >= md->end_subject || c == *eptr)
+if (eptr >= md->end_subject || c == md->lcc[*eptr])
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr++ >= md->end_subject)
+if (i >= max || !match_ref (offset, eptr, length, md, ims))
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject || c != *eptr++)
+if (i >= max || eptr >= md->end_subject || c == *eptr++)
+if (i >= max || eptr >= md->end_subject)
+if (i >= max || eptr >= md->end_subject)
+if (is_subject && length > md->end_subject - p)
+if (isprint (c = *(p++)))
+if (length == 0)
+if (length > md->end_subject - eptr)
+if (length > md->end_subject - eptr)
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match_block.end_offset_top > offsetcount)
+if (match_block.offset_vector != NULL)
+if (match_block.offset_vector == NULL)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (md->lcc[*ecode++] != md->lcc[*eptr++])
+if (md->lcc[*ecode++] == md->lcc[*eptr++])
+if (md->lcc[*p++] != md->lcc[*eptr++])
+if (md->notbol && eptr == md->start_subject)
+if (md->notempty && eptr == md->start_match)
+if (md->noteol)
+if (md->noteol)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min > 0)
+if (min > md->end_subject - eptr)
+if (min > md->end_subject - eptr)
+if (min > md->end_subject - eptr)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (number > 0)
+if (number > EXTRACT_BASIC_MAX)
+if (number > EXTRACT_BASIC_MAX)
+if (offset < md->offset_max)
+if (offset >= md->offset_max)
+if (offset_top <= offset)
+if (offsetcount < 2)
+if (offsetcount >= 4)
+if (op > OP_BRA)
+if (p > req_char_ptr)
+if (p >= end_subject)
+if (pp == req_char || pp == req_char2)
+if (re == NULL || subject == NULL ||
+if (re->magic_number != MAGIC_NUMBER)
+if (re->max_match_size >= 0
+if (re->top_backref > 0 && re->top_backref >= ocount / 3)
+if (req_char == req_char2)
+if (req_char >= 0)
+if (resetcount > offsetcount)
+if (save != stacksave)
+if (save == NULL)
+if (skipped_chars)
+if (start_match + bmtable[256] > end_subject)
+if (start_match > match_block.start_subject + start_offset)
+if (using_temporary_offsets)
+if (using_temporary_offsets)
+if certain parts of the pattern were not used. */
+if the malloc fails ... there is no way of returning to the top level with
+implied in the second condition, because start_offset > 0. */
+ims current /i, /m, and /s options
+ims the ims flags
+ims = (ims & ~PCRE_IMS) | ecode[4];
+ims = ecode[1];
+ims = original_ims;
+ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
+in the pattern. */
+in the subject string, while eptrb holds the value of eptr at the start of the
+initialize them to avoid reading uninitialized locations. */
+inline, and there are *still* stupid compilers about that don't like indented
+inside the group.
+int
+int *offsets;
+int *save;
+int c;
+int first_char = -1;
+int flags;
+int length;
+int length;
+int length;
+int length;
+int min, max, ctype;
+int number = *prev - OP_BRA;
+int number = op - OP_BRA;
+int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
+int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
+int offset;
+int offset;
+int offset;
+int offset_top;
+int offsetcount;
+int op = (int) *ecode;
+int options;
+int rc;
+int req_char = -1;
+int req_char2 = -1;
+int resetcount, ocount;
+int save_offset1 = md->offset_vector[offset];
+int save_offset2 = md->offset_vector[offset + 1];
+int save_offset3 = md->offset_vector[md->offset_end - number];
+int skipped_chars = 0;
+int stacksave[15];
+int start_offset;
+is a bit large to put on the stack, but using malloc for small numbers
+is_subject TRUE if printing from within md->start_subject
+it as matched, any number of times (otherwise there could be infinite
+item to see if there is repeat information following. The code is similar
+item to see if there is repeat information following. Then obey similar
+last bracketed group - used for breaking infinite loops matching zero-length
+later in the subject; otherwise the test starts at the match point. This
+length length of subject string (may contain binary zeros)
+length length to be matched
+length number to print
+length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
+length = md->end_subject - p;
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+loop. */
+loops). */
+main loop. */
+majority of cases. It will be suboptimal when the case flag changes in a regex
+mark, since extracts may have been taken during the assertion. */
+mark, since extracts may have been taken. */
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode, offset_top, md, ims, eptrb, flags)
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match_block.ctypes = re->tables + ctypes_offset;
+match_block.end_subject = match_block.start_subject + length;
+match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
+match_block.errorcode == PCRE_ERROR_NOMATCH &&
+match_block.lcc = re->tables + lcc_offset;
+match_block.lcc[*start_match] != first_char)
+match_block.notbol = (options & PCRE_NOTBOL) != 0;
+match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
+match_block.noteol = (options & PCRE_NOTEOL) != 0;
+match_block.offset_end = ocount;
+match_block.offset_max = (2 * ocount) / 3;
+match_block.offset_overflow = FALSE;
+match_block.offset_overflow = TRUE;
+match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
+match_block.offset_vector = offsets;
+match_block.start_match = start_match;
+match_block.start_pattern = re->code;
+match_block.start_subject = (const uschar *) subject;
+match_condassert - this is an assertion condition
+match_condassert | match_isgroup))
+match_data *md;
+match_data *md;
+match_data *md;
+match_data match_block;
+match_isgroup - this is the start of a bracketed group
+match_isgroup);
+match_ref (offset, eptr, length, md, ims)
+matches, we carry on as at the end of a normal bracket, leaving the subject
+matching won't pass the KET for an assertion. If any one branch matches,
+matching won't pass the KET for this kind of subpattern. If any one branch
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[3] << 8) + ecode[4];
+max = (ecode[3] << 8) + ecode[4];
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max, eptr));
+max, eptr));
+maximum. Alternatively, if maximizing, find the maximum number of
+maximum. Alternatively, if maximizing, find the maximum number of
+may be wrong. */
+md pointer to "static" info for the match
+md pointer to matching data block, if is_subject is TRUE
+md points to match data block
+md->end_match_ptr = eptr; /* For ONCE */
+md->end_match_ptr = eptr; /* Record where we ended */
+md->end_offset_top = offset_top; /* and how many extracts were taken */
+md->end_offset_top = offset_top;
+md->end_subject - eptr + 1 :
+md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
+md->offset_overflow = TRUE;
+md->offset_vector[md->offset_end - i] = save[i];
+md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
+md->offset_vector[md->offset_end - number] = save_offset3;
+md->offset_vector[md->offset_end - number];
+md->offset_vector[offset + 1] - md->offset_vector[offset];
+md->offset_vector[offset + 1] = eptr - md->start_subject;
+md->offset_vector[offset + 1] = save_offset2;
+md->offset_vector[offset] =
+md->offset_vector[offset] = save_offset1;
+memcpy (offsets + 2, match_block.offset_vector + 2,
+min = (ecode[1] << 8) + ecode[2];
+min = (ecode[1] << 8) + ecode[2];
+min = 0;
+min = 0;
+min = 0;
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = 1;
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+minima. */
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = *ecode == OP_MINUPTO;
+minimize = *ecode == OP_NOTMINUPTO;
+minimize = *ecode == OP_TYPEMINUPTO;
+minimize = TRUE;
+minimum number of matches are present. If min = max, continue at the same
+minimum number of matches are present. If min = max, continue at the same
+misrepresented as being the original software.
+move back, this match function fails. */
+mustn't change the current values of the data slot, because they may be set
+need to recurse. */
+never be used unless previously set, but they get saved and restored, and so we
+never set for an anchored regular expression, but the anchoring may be forced
+newline unless endonly is set, else end of subject unless noteol is set. */
+newptrb.prev = eptrb;
+newptrb.saved_eptr = eptr;
+next += (next[1] << 8) + next[2];
+next += (next[1] << 8) + next[2];
+non-capturing bracket. Don't worry about setting the flag for the error case
+number = (ecode[4] << 8) | ecode[5];
+number = (prev[4] << 8) | prev[5];
+number from a dummy opcode at the start. */
+number, then move along the subject till after the recursive match,
+ocount = offsetcount - (offsetcount % 3);
+ocount = re->top_backref * 3 + 3;
+of (?ims) items in the pattern. They are kept in a local variable so that
+of 3. */
+of subject left; this ensures that every attempt at a match fails. We
+offset index into the offset vector
+offset = number << 1;
+offset = number << 1;
+offset_top current top pointer
+offset_top = md->end_offset_top;
+offset_top = md->end_offset_top;
+offset_top = md->end_offset_top;
+offset_top = offset + 2;
+offset_top, md, ims, eptrb, match_isgroup);
+offsetcount the number of elements in the vector
+offsets points to a vector of ints to be filled in with offsets
+offsets[0] = start_match - match_block.start_subject;
+offsets[1] = match_block.end_match_ptr - match_block.start_subject;
+op = OP_BRA;
+opcode. */
+optimization can save a huge amount of backtracking in patterns with nested
+option for each character match. Maybe that wouldn't add very much to the
+options option bits
+p points to characters
+p--;
+p--;
+past the end if there is only one branch, but that's OK because that is
+pchars (ecode, length, FALSE, md);
+pchars (eptr, 16, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (p, length, FALSE, md);
+pchars (p, length, is_subject, md)
+pchars (start_match, end_subject - start_match, TRUE, &match_block);
+pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
+place we found it at last time. */
+pointer. */
+portions of the string if it matches. Two elements in the vector are set for
+pre-processor statements. I suppose it's only been 10 years... */
+preceded by BRAZERO or BRAMINZERO. */
+preceding bracket, in the appropriate order. */
+preceding bracket, in the appropriate order. We need to reset any options
+printf (" against backref ");
+printf (" against pattern ");
+printf ("%c", c);
+printf (">>>> Match against: ");
+printf (">>>>> Skipped %d chars to reach first character\n",
+printf ("\\x%02x", c);
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("end bracket %d", number);
+printf ("matching subject ");
+printf ("matching subject ");
+printf ("matching subject <null> against pattern ");
+printf ("matching subject <null>");
+printf ("start bracket %d subject=", number);
+rc = 0;
+rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
+rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
+register const uschar *ecode;
+register const uschar *eptr;
+register const uschar *eptr;
+register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
+register int *iend = iptr + resetcount;
+register int *iend = iptr - resetcount / 2 + 1;
+register int *iptr = match_block.offset_vector + ocount;
+register int *iptr = match_block.offset_vector;
+register int c = *start_match;
+register int c;
+register int i;
+register int length = ecode[1];
+register int pp = *p++;
+repeat it in the interests of efficiency. */
+repeat limits are compiled as a number of copies, with the optional ones
+req_char = re->req_char;
+req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
+req_char_ptr = p;
+resetcount = 2 + re->top_bracket * 2;
+resetcount = ocount;
+restoring at the exit of a group is easy. */
+restrictions:
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return PCRE_ERROR_BADMAGIC;
+return PCRE_ERROR_BADOPTION;
+return PCRE_ERROR_NOMATCH;
+return PCRE_ERROR_NOMEMORY;
+return PCRE_ERROR_NULL;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return match (eptr,
+return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
+return match_block.errorcode;
+return rc;
+save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
+save = stacksave;
+save = stacksave;
+save[i] = md->offset_vector[md->offset_end - i];
+seems expensive. As a compromise, the stack is used when there are fewer
+share code. This is very similar to the code for single characters, but we
+similar code to character type repeats - written out again for speed.
+since matching characters is likely to be quite common. First, ensure the
+since matching characters is likely to be quite common. First, ensure the
+skipped_chars += bmtable[*start_match],
+skipped_chars += bmtable[256] - 1;
+skipped_chars -= bmtable[256] - 1;
+skipped_chars);
+skipped_chars++,
+skipped_chars++,
+skipped_chars++,
+skipped_chars++,
+stack of such pointers, to be re-instated at the end of the group when we hit
+stack, for holding the values of the subject pointer at the start of each
+start of each branch to move the current point backwards, so the code at
+start_bits = extra->data.start_bits;
+start_match += bmtable[*start_match];
+start_match += bmtable[256] - 1;
+start_match -= bmtable[256] - 1;
+start_match = (const uschar *) subject + length - re->max_match_size;
+start_match++ < end_subject);
+start_match++;
+start_match++;
+start_match++;
+start_match++;
+start_offset where to start in the subject string
+startline = (re->options & PCRE_STARTLINE) != 0;
+static BOOL
+static BOOL
+static const char rep_max[] =
+static const char rep_min[] =
+static void
+strings.
+struct eptrblock *prev;
+studied, there may be a bitmap of possible first characters. */
+subject points to the subject string
+subject if the requested.
+subpattern - to break infinite loops. */
+subpattern, so as to detect when an empty string has been matched by a
+subsequent match. */
+such there are (offset_top records the completed total) so we just have
+supersede any condition above with which it is incompatible.
+switch (*ecode)
+switch (*ecode)
+switch (ctype)
+switch (ctype)
+switch (ctype)
+switch (op)
+test once at the start (i.e. keep it out of the loop). */
+than 16 values to store; otherwise malloc is used. A problem is what to do
+than the number of characters left in the string, so the match fails.
+that "continue" in the code above comes out to here to repeat the main
+that changed within the bracket before re-running it, so check the next
+that it may occur zero times. It may repeat infinitely, or not at all -
+the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
+the closing ket. When match() is called in other circumstances, we don't add to
+the code for a repeated single character, but I haven't found a nice way of
+the current subject position in the working slot at the top of the vector. We
+the expression and advancing one matching character if failing, up to the
+the expression and advancing one matching character if failing, up to the
+the external pcre header. */
+the file Tech.Notes for some information on the internals.
+the final argument TRUE causes it to stop at the end of an assertion. */
+the group. */
+the length of the reference string explicitly rather than passing the
+the loop runs just once. */
+the minimum number of bytes before we start. */
+the number from a dummy opcode at the start. */
+the point in the subject string is not moved back. Thus there can never be
+the pointer while it matches the class. */
+the same bracket.
+the stack. */
+the start hasn't passed this character yet. */
+the subject. */
+the subject. */
+there were too many extractions, set the return code to zero. In the case
+this level is identical to the lookahead case. */
+this makes a huge difference to execution time when there aren't many brackets
+those back references that we can. In this case there need not be overflow
+time taken, but character matching *is* what this is all about... */
+to save all the potential data. There may be up to 99 such values, which
+to that for character classes, but repeated for efficiency. Then obey
+two branches. If the condition is false, skipping the first branch takes us
+typedef struct eptrblock
+unless PCRE_CASELESS was given or the casing state changes within the regex.
+unlimited repeats that aren't going to match. We don't know what the state of
+unsigned long int ims = 0;
+unsigned long int ims;
+unsigned long int ims;
+unsigned long int original_ims = ims; /* Save for resetting on ')' */
+up quickly if there are fewer than the minimum number of characters left in
+up quickly if there are fewer than the minimum number of characters left in
+using_temporary_offsets = TRUE;
+values of the final offsets, in case they were set by a previous iteration of
+we just need to set up the whole thing as substring 0 before returning. If
+where we had to get some local store to hold offsets for backreferences, copy
+while (!anchored &&
+while (*ecode == OP_ALT)
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*next == OP_ALT);
+while (*next == OP_ALT);
+while (--iptr >= iend)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (iptr < iend)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (p < end_subject)
+while (p < end_subject)
+while (start_match < end_subject &&
+while (start_match < end_subject && *start_match != first_char)
+while (start_match < end_subject && start_match[-1] != '\n')
+while (start_match < end_subject)
+while (start_match < end_subject)
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{0, 0, 0, 0, 1, 1};
+{0, 0, 1, 1, 0, 0};
+} /* End of main loop */
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
diff --git a/testsuite/uniq.sed b/testsuite/uniq.sed
new file mode 100644
index 0000000..7ec66c4
--- /dev/null
+++ b/testsuite/uniq.sed
@@ -0,0 +1,20 @@
+h
+
+:b
+# On the last line, print and exit
+$b
+N
+/^\(.*\)\n\1$/ {
+ # The two lines are identical. Undo the effect of
+ # the n command.
+ g
+ bb
+}
+
+# If the @code{N} command had added the last line, print and exit
+$b
+
+# The lines are different; print the first and go
+# back working on the second.
+P
+D
diff --git a/testsuite/version.gin b/testsuite/version.gin
new file mode 100644
index 0000000..2ff9735
--- /dev/null
+++ b/testsuite/version.gin
@@ -0,0 +1,5 @@
+GNU sed version @VERSION@
+Copyright (C) 2003 Free Software Foundation, Inc.
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,
+to the extent permitted by law.
diff --git a/testsuite/writeout.inp b/testsuite/writeout.inp
new file mode 100644
index 0000000..1cfceaf
--- /dev/null
+++ b/testsuite/writeout.inp
@@ -0,0 +1,4 @@
+Facts are simple and facts are straight
+Facts are lazy and facts are late
+Facts all come with points of view
+Facts don't do what I want them to
diff --git a/testsuite/writeout.sed b/testsuite/writeout.sed
new file mode 100644
index 0000000..f925a4d
--- /dev/null
+++ b/testsuite/writeout.sed
@@ -0,0 +1 @@
+/^Facts ar/w writeout.wout
diff --git a/testsuite/wrtout1.good b/testsuite/wrtout1.good
new file mode 100644
index 0000000..1cfceaf
--- /dev/null
+++ b/testsuite/wrtout1.good
@@ -0,0 +1,4 @@
+Facts are simple and facts are straight
+Facts are lazy and facts are late
+Facts all come with points of view
+Facts don't do what I want them to
diff --git a/testsuite/wrtout2.good b/testsuite/wrtout2.good
new file mode 100644
index 0000000..2ef3f50
--- /dev/null
+++ b/testsuite/wrtout2.good
@@ -0,0 +1,2 @@
+Facts are simple and facts are straight
+Facts are lazy and facts are late
diff --git a/testsuite/xabcx.good b/testsuite/xabcx.good
new file mode 100644
index 0000000..3f8bc81
--- /dev/null
+++ b/testsuite/xabcx.good
@@ -0,0 +1,4 @@
+roses are red
+violets are blue
+my feet are cold
+your feet are too
diff --git a/testsuite/xabcx.inp b/testsuite/xabcx.inp
new file mode 100644
index 0000000..f2e2b38
--- /dev/null
+++ b/testsuite/xabcx.inp
@@ -0,0 +1,4 @@
+roses are red
+violets are blue
+my feet are cold
+your feet are blue
diff --git a/testsuite/xabcx.sed b/testsuite/xabcx.sed
new file mode 100644
index 0000000..2a872fb
--- /dev/null
+++ b/testsuite/xabcx.sed
@@ -0,0 +1,2 @@
+# from the ChangeLog (Fri May 21 1993)
+\xfeetxs/blue/too/
diff --git a/testsuite/xbxcx.good b/testsuite/xbxcx.good
new file mode 100644
index 0000000..9eadcd0
--- /dev/null
+++ b/testsuite/xbxcx.good
@@ -0,0 +1,7 @@
+x
+xbx
+xbxcx
+xbxcx
+xbxcx
+xbxcx
+xbxcx \ No newline at end of file
diff --git a/testsuite/xbxcx.inp b/testsuite/xbxcx.inp
new file mode 100644
index 0000000..792d120
--- /dev/null
+++ b/testsuite/xbxcx.inp
@@ -0,0 +1,7 @@
+
+b
+bc
+bac
+baac
+baaac
+baaaac \ No newline at end of file
diff --git a/testsuite/xbxcx.sed b/testsuite/xbxcx.sed
new file mode 100644
index 0000000..e6a9c3d
--- /dev/null
+++ b/testsuite/xbxcx.sed
@@ -0,0 +1,2 @@
+# from the ChangeLog (Wed Sep 5 2001)
+s/a*/x/g
diff --git a/testsuite/xbxcx3.good b/testsuite/xbxcx3.good
new file mode 100644
index 0000000..072a680
--- /dev/null
+++ b/testsuite/xbxcx3.good
@@ -0,0 +1,7 @@
+
+b
+bcx
+bacx
+baacx
+baaacx
+baaaacx
diff --git a/testsuite/xbxcx3.inp b/testsuite/xbxcx3.inp
new file mode 100644
index 0000000..cac4334
--- /dev/null
+++ b/testsuite/xbxcx3.inp
@@ -0,0 +1,7 @@
+
+b
+bc
+bac
+baac
+baaac
+baaaac
diff --git a/testsuite/xbxcx3.sed b/testsuite/xbxcx3.sed
new file mode 100644
index 0000000..759483c
--- /dev/null
+++ b/testsuite/xbxcx3.sed
@@ -0,0 +1 @@
+s/a*/x/3
diff --git a/testsuite/xemacs.good b/testsuite/xemacs.good
new file mode 100644
index 0000000..9fce4f1
--- /dev/null
+++ b/testsuite/xemacs.good
@@ -0,0 +1,67 @@
+#Makefile.in generated automatically by automake 1.5 from Makefile.am.
+
+#Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+#Free Software Foundation, Inc.
+#This Makefile.in is free software; the Free Software Foundation
+#gives unlimited permission to copy and/or distribute it,
+#with or without modifications, as long as this notice is preserved.
+
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+#even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#PARTICULAR PURPOSE.
+
+"@SET_MAKE@"
+
+#Automake requirements
+
+"SHELL = @SHELL@"
+
+"PACKAGE = sed"
+
+"EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \\"
+" m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \\"
+" m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \\"
+" m4/obstack.m4"
+
+"subdir = ."
+"ACLOCAL_M4 = $(top_srcdir)/aclocal.m4"
+"mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs"
+"CONFIG_HEADER = config.h"
+"CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile"
+"DIST_SOURCES ="
+"DATA = $(noinst_DATA)"
+
+"HEADERS = $(noinst_HEADERS)"
+
+
+"RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \\"
+" uninstall-info-recursive all-recursive install-data-recursive \\"
+" install-exec-recursive installdirs-recursive install-recursive \\"
+" uninstall-recursive check-recursive installcheck-recursive"
+"DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \\"
+" COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \\"
+" TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \\"
+" config.sub config_h.in configure configure.ac depcomp \\"
+" install-sh missing mkinstalldirs"
+"DIST_SUBDIRS = $(SUBDIRS)"
+"all: config.h"
+" $(MAKE) $(AM_MAKEFLAGS) all-recursive"
+
+".SUFFIXES:"
+"$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)"
+" cd $(top_srcdir) && \\"
+" $(AUTOMAKE) --gnu Makefile"
+"Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status"
+" cd $(top_builddir) && \\"
+" CONFIG_HEADERS= CONFIG_LINKS= \\"
+" CONFIG_FILES=$@ $(SHELL) ./config.status"
+
+"$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)"
+" $(SHELL) ./config.status --recheck"
+"$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)"
+" cd $(srcdir) && $(AUTOCONF)"
+
+"$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4"
+" cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)"
+"config.h: stamp-h" \ No newline at end of file
diff --git a/testsuite/xemacs.inp b/testsuite/xemacs.inp
new file mode 100644
index 0000000..0fc0414
--- /dev/null
+++ b/testsuite/xemacs.inp
@@ -0,0 +1,67 @@
+# Makefile.in generated automatically by automake 1.5 from Makefile.am.
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Automake requirements
+
+SHELL = @SHELL@
+
+PACKAGE = sed
+
+EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \
+ m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \
+ m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \
+ m4/obstack.m4
+
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile
+DIST_SOURCES =
+DATA = $(noinst_DATA)
+
+HEADERS = $(noinst_HEADERS)
+
+
+RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \
+ uninstall-info-recursive all-recursive install-data-recursive \
+ install-exec-recursive installdirs-recursive install-recursive \
+ uninstall-recursive check-recursive installcheck-recursive
+DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \
+ COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \
+ TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \
+ config.sub config_h.in configure configure.ac depcomp \
+ install-sh missing mkinstalldirs
+DIST_SUBDIRS = $(SUBDIRS)
+all: config.h
+ $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && \
+ CONFIG_HEADERS= CONFIG_LINKS= \
+ CONFIG_FILES=$@ $(SHELL) ./config.status
+
+$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ $(SHELL) ./config.status --recheck
+$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+ cd $(srcdir) && $(AUTOCONF)
+
+$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4
+ cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+config.h: stamp-h \ No newline at end of file
diff --git a/testsuite/xemacs.sed b/testsuite/xemacs.sed
new file mode 100644
index 0000000..ee2f744
--- /dev/null
+++ b/testsuite/xemacs.sed
@@ -0,0 +1,16 @@
+# Inspired by xemacs' config.status script
+# submitted by John Fremlin (john@fremlin.de)
+
+/^# Generated/d
+s%/\*\*/#.*%%
+s/^ *# */#/
+/^##/d
+/^#/ {
+ p
+ d
+}
+/./ {
+ s/\([\"]\)/\\\1/g
+ s/^/"/
+ s/$/"/
+}