diff options
Diffstat (limited to 'libio/dbz')
-rw-r--r-- | libio/dbz/Makefile.in | 217 | ||||
-rw-r--r-- | libio/dbz/README | 25 | ||||
-rw-r--r-- | libio/dbz/altbytes | 7 | ||||
-rw-r--r-- | libio/dbz/byteflip.c | 38 | ||||
-rw-r--r-- | libio/dbz/case.c | 129 | ||||
-rw-r--r-- | libio/dbz/case.h | 12 | ||||
-rw-r--r-- | libio/dbz/configure.in | 17 | ||||
-rw-r--r-- | libio/dbz/dbz.1 | 221 | ||||
-rw-r--r-- | libio/dbz/dbz.3z | 547 | ||||
-rw-r--r-- | libio/dbz/dbz.c | 1763 | ||||
-rw-r--r-- | libio/dbz/dbz.h | 32 | ||||
-rw-r--r-- | libio/dbz/dbzmain.c | 519 | ||||
-rw-r--r-- | libio/dbz/fake.c | 144 | ||||
-rw-r--r-- | libio/dbz/firstlast25 | 50 | ||||
-rwxr-xr-x | libio/dbz/getmap | 6 | ||||
-rw-r--r-- | libio/dbz/random.c | 31 | ||||
-rw-r--r-- | libio/dbz/revbytes | 7 | ||||
-rw-r--r-- | libio/dbz/stdio.h | 1 |
18 files changed, 3766 insertions, 0 deletions
diff --git a/libio/dbz/Makefile.in b/libio/dbz/Makefile.in new file mode 100644 index 00000000000..3f7833be840 --- /dev/null +++ b/libio/dbz/Makefile.in @@ -0,0 +1,217 @@ +srcdir = . +CFLAGS = -g +C_FLAGS = $(CFLAGS) -I$(srcdir) -I.. -I$(srcdir)/.. -DDBZ_FINISH='_IO_flush_all()' +CC = `if [ -f ../../../gcc/gcc ] ; \ + then echo ../../../gcc/gcc -B../../../gcc/ ; \ + else echo gcc ; fi` +LIBIO = ../libio.a ../../libiberty/libiberty.a +LIBS = $(LIBIO) +# LIBS = ../libcnews.a +DBM = +RFC = -DHAVERFCIZE +CASE = case.o +DEBUG = -DDBZDEBUG +LINTFLAGS = -h $(DEBUG) $(RFC) -I$(srcdir) +LDFLAGS = +# =()<NEWSBIN = @<NEWSBIN>@>()= +NEWSBIN = /usr/lib/newsbin +# workaround for System V make bug +SHELL = /bin/sh + +# database sizes for performance tests, regression, and regression prime-find +TSIZE=12007 +RSIZE=4019 +RPSIZE=2679 + +#### host and target dependent Makefile fragments come in here. +## + +# history files for regression and performance tests +RHIST=hist3.3 +R2HIST=hist10 +THIST=hist13 + +#all: dbz +all: +install: +install-info: +info: + +check: r rclean + +bininstall: dbz + cp dbz $(NEWSBIN) + +cmp: dbz + cmp dbz $(NEWSBIN)/dbz + +newsinstall: + : nothing + +u: dbz.o + ar ruv ../libcnews.a dbz.o + cmp dbz.h ../h/dbz.h + +t: tdbz fake + +lint: + lint $(LINTFLAGS) dbzmain.c dbz.c + +.c.o: + $(CC) $(C_FLAGS) -c $< + +rdbz.o: rdbz.c + $(CC) $(C_FLAGS) $(DEBUG) -DDEFSIZE=$(RSIZE) -c rdbz.c + +rdbzmain.o: rdbzmain.c + $(CC) $(C_FLAGS) $(RFC) -c rdbzmain.c + +tdbz.o: $(srcdir)/dbz.c + cp $(srcdir)/dbz.c tdbz.c + $(CC) $(C_FLAGS) -DDEFSIZE=$(TSIZE) -c tdbz.c + rm tdbz.c + +dbz: dbzmain.o $(CASE) + $(CC) $(LDFLAGS) dbzmain.o $(CASE) $(PRE) $(DBM) $(LIBS) $(POST) -o $@ + +tdbz: dbzmain.o tdbz.o $(CASE) + $(CC) $(LDFLAGS) dbzmain.o tdbz.o $(CASE) $(PRE) $(LIBS) $(POST) -o $@ + +rdbz: rdbzmain.o rdbz.o $(CASE) + $(CC) $(LDFLAGS) rdbzmain.o rdbz.o $(CASE) $(PRE) $(LIBS) $(POST) -o $@ + +fake: fake.o random.o + $(CC) $(LDFLAGS) fake.o random.o $(PRE) $(LIBS) $(POST) -o $@ + +byteflip: byteflip.o + $(CC) $(LDFLAGS) byteflip.o $(PRE) $(LIBS) $(POST) -o $@ + +hist10: fake + ./fake -t -e 75 10000 >$@ + +hist3.3: fake + ./fake -t -e 75 3300 >$@ + +hist13: fake + ./fake -t -e 75 13000 >$@ + +r: $(srcdir)/getmap $(srcdir)/revbytes $(srcdir)/altbytes stamp-r8 + : success! + +stamp-r0: + : 'WARNING: creates about 2MB of debris; do "make rclean" afterward' + rm -f dbase dbase[23] dbase.* dbase[23].* + test ! -d xx || rmdir xx + @touch stamp-r0 + +stamp-r1: $(RHIST) $(R2HIST) stamp-r0 + : crude check of synthetic history file + ( sed 25q $(RHIST) ; tail -25 $(RHIST) ) >histjunk + cmp histjunk $(srcdir)/firstlast25 + rm histjunk + @touch stamp-r1 + +r2a: rdbz stamp-r1 + : basic tests, exercising as many options as possible + cp $(RHIST) dbase + mkdir xx + chmod -w xx + ./rdbz -E 1000 -0 -M -i -S -u -U -C xx dbase + rmdir xx + sed '/> 0/d' $(RHIST) >dbase.used + test "`cat dbase.used | wc -l`" -eq "`sed -n '2s/ .*//p' dbase.dir`" ; + +stamp-r2: r2a + cp $(RHIST) dbase2 + ./rdbz -E 1000 -0 -p $(RPSIZE) -t ' ' dbase2 + cmp $(RHIST) dbase + cmp dbase dbase2 + cmp dbase.dir dbase2.dir + cmp dbase.pag dbase2.pag + ./rdbz -E 1000 -0 -c dbase + ./rdbz -E 1000 -0 -c -i -q -M -U dbase + @touch stamp-r2 + +stamp-r3: stamp-r2 + : build a database and then add to it + sed 1000q $(RHIST) >dbase2 + sed 1,1000d $(RHIST) >dbase2.add + ./rdbz -E 1000 -0 dbase2 + ./rdbz -E 1000 -0 -a dbase2 dbase2.add + cmp dbase dbase2 + cmp dbase.dir dbase2.dir + cmp dbase.pag dbase2.pag + @touch stamp-r3 + +stamp-r4: stamp-r3 + : build based on existing one, test extraction and readonly files + ./rdbz -E 1000 -0 -f dbase dbase2 + test "`cat dbase.used | wc -l`" -eq "`awk 'NR==2{print $$1}' dbase2.dir`" ; + test "`cat dbase.used | wc -l`" -eq "`awk 'NR==2{print $$2}' dbase2.dir`" ; + chmod -w dbase2.dir dbase2.pag + ./rdbz -E 1000 -x dbase2 dbase >dbase.temp + cmp dbase.used dbase.temp + @touch stamp-r4 + +stamp-r5: stamp-r4 + : try some small case perversions + sed 's/\(@[^ ]*\)A/\1a/' dbase >dbase.ick + ./rdbz -E 1000 -x dbase2 dbase.ick >dbase.temp + cmp dbase.used dbase.temp + sed -n 's/A\([^ ]*@\)/a\1/p' dbase >dbase.ick + ./rdbz -x dbase2 dbase.ick >dbase.temp + test ! -s dbase.temp ; + rm -f dbase2.dir dbase2.pag + @touch stamp-r5 + +stamp-r6: stamp-r5 + : try it without tags, case-insensitive, with case perversions + ./rdbz -E 1000 -0 -p '0 b 1' dbase2 + tr '[A-M][n-z]' '[a-m][N-Z]' <dbase2 >dbase.ick + ./rdbz -E 1000 -x dbase2 dbase.ick >dbase.temp + cmp dbase.used dbase.temp + rm -f dbase.temp dbase.ick + @touch stamp-r6 + +stamp-r7: byteflip stamp-r6 + : test various perversions of byte ordering + awk -f $(srcdir)/revbytes dbase.dir >dbase2.dir + ./byteflip `$(srcdir)/getmap dbase.dir` `$(srcdir)/getmap dbase2.dir` <dbase.pag >dbase2.pag + cp dbase dbase2 + ./rdbz -E 1000 -0 -c dbase2 + awk -f $(srcdir)/altbytes dbase.dir >dbase2.dir + dd conv=swab <dbase.pag >dbase2.pag + ./rdbz -E 1000 -0 -c dbase2 + cp dbase2 dbase3 + ./rdbz -E 1000 -0 -f dbase2 dbase3 + ./rdbz -E 1000 -0 -c dbase3 + test " `$(srcdir)/getmap dbase2.dir`" = " `$(srcdir)/getmap dbase3.dir`" ; + @touch stamp-r7 + +stamp-r8: stamp-r7 + : test massive overflow, throw in case sensitivity and tag mask + cp $(R2HIST) dbase + ./rdbz -E 1000 -0 -p '0 0 7ffc0000' dbase + ./rdbz -E 1000 -0 -cq dbase + sed 100q dbase | egrep '[aA].* ' | tr aA Aa >dbase.ick + ./rdbz -x dbase dbase.ick >dbase.temp + test ! -s dbase.temp ; + @touch stamp-r8 + +rclean: + rm -f dbase dbase[23] dbase.* dbase[23].* fake fake.o random.o + rm -f rdbz rdbz.o rdbzmain.o $(RHIST) $(R2HIST) byteflip byteflip.o + rm -f histjunk core stamp-r? *~ + test ! -d xx || rmdir xx + +mostlyclean: rclean + rm -f *.o [a-z]dbz [a-z][a-z]dbz junk* PostScript.out + rm -f hist* dbase* *.bak mon.out gmon.out core dbm.h + +clean: mostlyclean + rm -f dbz + +distclean: clean + rm -rf Makefile config.status rdbz.c rdbzmain.c + +maintainer-clean realclean: distclean diff --git a/libio/dbz/README b/libio/dbz/README new file mode 100644 index 00000000000..e7fa8765ad5 --- /dev/null +++ b/libio/dbz/README @@ -0,0 +1,25 @@ +The dbz package was "liberated" from C News. +It is included with the GNU libio because it provides +a fairly good work-out for a stdio implementation. +The Makefile.in, configure.in, and stdio.h have been +set up to test libio. + +------ + +This is the new, improved, lemon-freshened :-) dbz. + +Just "make" will get you dbz.o and the dbz program. "make r" runs an +extensive set of regression tests; most of the mysterious oddments lying +around here are to do with that. "make rclean" cleans up after "make r". + +You probably want to inspect the #ifdef list early in dbz.c before +compiling, although the defaults should work all right on most systems. + +If you are not building this as part of C News, you will need to change +the -I option in FLAGS in the Makefile to "-I.", and delete the DBMLIBS +and RFC lines entirely. That will break some of the regression tests; +at some point I'll fix this. + +If you are using this independently from C News, you probably still want +to look through ../notebook/problems, as some of the portability problems +described in there can affect dbz. diff --git a/libio/dbz/altbytes b/libio/dbz/altbytes new file mode 100644 index 00000000000..26cc9fb9e02 --- /dev/null +++ b/libio/dbz/altbytes @@ -0,0 +1,7 @@ +NR == 1 { + printf "%s %s %s %s %s %s %s %s %s", $1, $2, $3, $4, $5, $6, $7, $8, $9 + for (i = 10; i <= NF; i += 2) + printf " %s %s", $(i+1), $i + printf "\n" +} +NR > 1 { print } diff --git a/libio/dbz/byteflip.c b/libio/dbz/byteflip.c new file mode 100644 index 00000000000..d54c6591fa1 --- /dev/null +++ b/libio/dbz/byteflip.c @@ -0,0 +1,38 @@ +#include <stdio.h> + +#define MAXWORD 32 + +int +main(argc, argv) +int argc; +char *argv[]; +{ + register int len; + int inmap[MAXWORD]; + int outmap[MAXWORD]; + char in[MAXWORD]; + char out[MAXWORD]; + register int i; + register int a; + + a = 1; + len = atoi(argv[a++]); + if (len > MAXWORD) + abort(); /* kind of drastic... */ + for (i = 0; i < len; i++) + inmap[i] = atoi(argv[a++]); + if (atoi(argv[a++]) != len) + abort(); + for (i = 0; i < len; i++) + outmap[i] = atoi(argv[a++]); + + while (fread(in, 1, len, stdin) == len) { + for (i = 0; i < len; i++) + out[outmap[i]] = in[inmap[i]]; + fwrite(out, 1, len, stdout); + } +#ifdef DBZ_FINISH + DBZ_FINISH; +#endif + exit(0); +} diff --git a/libio/dbz/case.c b/libio/dbz/case.c new file mode 100644 index 00000000000..87b741ff54a --- /dev/null +++ b/libio/dbz/case.c @@ -0,0 +1,129 @@ +/* + * case-mapping stuff + * + * We exploit the fact that we are dealing only with headers here, and + * headers are limited to the ASCII characters by RFC822. It is barely + * possible that we might be dealing with a translation into another + * character set, but in particular it's very unlikely for a header + * character to be outside -128..255. + * + * Life would be a whole lot simpler if tolower() could safely and portably + * be applied to any char. + */ +#include <stdio.h> +#include "string.h" +#include "case.h" + +/* note that case.h knows the value of OFFSET */ +#define OFFSET 128 /* avoid trouble with negative chars */ +#define MAPSIZE (256+OFFSET) +char casemap[MAPSIZE]; /* relies on init to '\0' */ +static int primed = 0; /* has casemap been set up? */ + +/* + - prime - set up case-mapping stuff + */ +static void +prime() +{ + register char *lp; + register char *up; + register int c; + register int i; + static char lower[] = "abcdefghijklmnopqrstuvwxyz"; + static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + for (lp = lower, up = upper; *lp != '\0'; lp++, up++) { + c = *lp; + casemap[c+OFFSET] = c; + casemap[*up+OFFSET] = c; + } + for (i = 0; i < MAPSIZE; i++) + if (casemap[i] == '\0') + casemap[i] = (char)(i-OFFSET); + primed = 1; +} + +/* + - cistrncmp - case-independent strncmp + */ +int /* < == > 0 */ +cistrncmp(s1, s2, len) +char *s1; +char *s2; +int len; +{ + register char *p1; + register char *p2; + register int n; + + if (!primed) + prime(); + + p1 = s1; + p2 = s2; + n = len; + while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) { + p1++; + p2++; + } + if (n < 0) + return(0); + + /* + * The following case analysis is necessary so that characters + * which look negative collate low against normal characters but + * high against the end-of-string NUL. + */ + if (*p1 == '\0' && *p2 == '\0') + return(0); + else if (*p1 == '\0') + return(-1); + else if (*p2 == '\0') + return(1); + else + return(TOLOW(*p1) - TOLOW(*p2)); +} + +/* + - rfc822ize - do the bizarre case conversion needed for rfc822 message-ids + * + * Actually, this is not quite complete. Absolute, total, full RFC822 + * compliance requires a horrible parsing job, because of the arcane + * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi, + * for example. There are three or four things that might occur in the + * domain part of a message-id that are case-sensitive. They don't seem + * to ever occur in real news, thank Cthulhu. (What? You were expecting + * a merciful and forgiving deity to be invoked in connection with RFC822? + * Forget it; none of them would come near it.) + */ +char * /* returns the argument */ +rfc822ize(s) +char *s; +{ + register char *p; + static char post[] = "postmaster"; + static int postlen = sizeof(post)-1; + + if (!primed) + prime(); + + p = strrchr(s, '@'); + if (p == NULL) /* no local/domain split */ + p = ""; /* assume all local */ + else if (p - (s+1) == postlen && CISTREQN(s+1, post, postlen)) { + /* crazy special case -- "postmaster" is case-insensitive */ + p = s; + } +#ifdef NONSTANDARD +#ifdef RFCVIOLATION +#ifdef B_2_11_MISTAKE + p = s; /* all case-insensitive */ +#endif +#endif +#endif + for (; *p != '\0'; p++) + *p = TOLOW(*p); + + return(s); +} diff --git a/libio/dbz/case.h b/libio/dbz/case.h new file mode 100644 index 00000000000..d5ef6961550 --- /dev/null +++ b/libio/dbz/case.h @@ -0,0 +1,12 @@ +extern int cistrncmp(); +extern char *rfc822ize(); + +extern char casemap[]; + +/* must call cistrncmp before invoking TOLOW... */ +#define TOLOW(c) (casemap[(c)+128]) /* see case.c for why 128 */ + +/* ...but the use of it in CISTREQN is safe without the preliminary call (!) */ +/* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */ +#define CISTREQN(a, b, n) \ + (TOLOW((a)[0]) == TOLOW((b)[0]) && cistrncmp(a, b, n) == 0) diff --git a/libio/dbz/configure.in b/libio/dbz/configure.in new file mode 100644 index 00000000000..4cb9b57ce0f --- /dev/null +++ b/libio/dbz/configure.in @@ -0,0 +1,17 @@ +# This file is a shell script fragment that supplies the information +# necessary for a configure script to process the program in +# this directory. For more information, look at ../configure. + +configdirs= +srctrigger=dbzmain.c +srcname="libio dbz test" + +# per-host: + +# per-target: + +files="dbz.c dbzmain.c" +links="rdbz.c rdbzmain.c" + +# post-target: + diff --git a/libio/dbz/dbz.1 b/libio/dbz/dbz.1 new file mode 100644 index 00000000000..d2fff17af98 --- /dev/null +++ b/libio/dbz/dbz.1 @@ -0,0 +1,221 @@ +.TH DBZ 1 "11 Feb 1992" +.BY "C News" +.SH NAME +dbz \- operate on dbz databases of text +.SH SYNOPSIS +.B dbz +[ +.BR \- { axmc } +] [ +.B \-t +c +] [ +.B \-l +length +] [ +.BR \- { qiue } +] [ +.B \-f +old +] [ +.B \-p +parms +] database file ... +.SH DESCRIPTION +.I Dbz +is a shell-level interface to the +.IR dbz (3z) +database routines for indexed access to a text file. +.PP +The +.I database +file must be a text file, +one line per database record, +with the key the first field on the line. +The +.B \-t +option sets the field-separator character; the default is tab. +Setting the separator character to NUL (with +.BR "\-t\ ''" ) +makes the whole line the key. +Lines must not exceed 1023 bytes in length including the newline; +this limit can be increased with the +.B \-l +option. +The limitations and restrictions of +.IR dbz (3z) +must also be observed; +in particular, it remains the user's responsibility to ensure that +no attempt is made to store two entries (whether identical or not) +with the same key. +.PP +In the absence of options, +.I dbz +creates a +.IR dbz (3z) +index for the database; +the index comprises files +.IB database .pag +and +.IB database .dir +in the same directory. +Any previous index is silently overwritten. +The +.BR \-a , +.BR \-x , +.BR \-m , +and +.B \-c +options specify other operations. +.PP +With +.BR \-a , +.I dbz +appends lines from the +.IR file (s) +(standard input if none) +to the database, updating both the +text file and the indexes. +.PP +With +.BR \-x , +.I dbz +reads keys from the +.IR file (s) +(standard input if none) +and prints (on standard output) the corresponding lines, if any, +from the database. +The input is in the form of database lines, although only the keys are +significant. +The +.B \-q +option makes +.B \-x +print the input lines whose keys are found instead of the database +lines; this is somewhat faster. +.PP +With +.BR \-m , +operation is the same as for +.B \-x +except that the keys which are \fInot\fR present in the database are printed. +.PP +With +.BR \-c , +.I dbz +checks the database for internal consistency. +The +.B \-q +option causes this check to be done more quickly but less thoroughly +(each key is looked up in the index, but no check is made to be sure +that the index entry points to the right place). +.PP +The +.B \-i +option suppresses the use of +.IR dbz (3z)'s +.I incore +facility. +This makes accesses slower, but keeps the files current +during updating +and reduces +startup/shutdown overhead. +.PP +Normally, +.I dbz +checks whether a key is already in the database before adding it. +The +.B \-u +option suppresses this check, speeding things up at the expense of safety. +.PP +A new index is normally created with default size, +case mapping, and tagging. +The default size is right for 90-100,000 records. +The default case mapping is right for RFC822 message-ids. +See +.IR dbz (3z) +for what tagging is about. +(Note, these defaults can be changed when +.IR dbz (3z) +is installed.) +.PP +If the +.B \-f +option is given, +size, case mapping, and tagging +are instead initialized based on the +database +.IR old . +This is mostly useful when +creating a new generation of an existing database. +(See the description of +.I dbzagain +in +.IR dbz (3z) +for details.) +.PP +If the +.B \-p +option is given, the +.I parms +string specifies the size, case mapping, and tagging. +If +.I parms +is a single decimal number, +that is taken as the expected number of records +in the index, with case mapping and tagging defaulted. +Alternatively, +.I parms +can be three fields\(ema decimal number, a case-mapping code character, and a +hexadecimal tag mask\(emseparated by white space. +The decimal number is, again, the expected number of records; +0 means ``use the default''. +See +.IR dbz (3z) +for possible choices of case-mapping code, +but in particular, +.B 0 +means ``no case mapping''. +See +.IR dbz (3z) +for details on tag masks; +0 means ``use the default''. +.PP +If the +.B \-e +option is given, the decimal number in +.B \-p +is taken to be the exact table size, not the expected number of records, +and invocation of +.I dbzsize +(see +.IR dbz (3z)) +to predict a good size for that number of records is suppressed. +.PP +The +.B \&.pag +file is normally about 6 bytes per record (based on the estimate given to +.B \-p +or the previous history of the +.B \-f +database). +The +.B \&.dir +file is tiny. +.SH SEE ALSO +dbz(3z) +.SH HISTORY +Written at U of Toronto by Henry Spencer, for the C News project. +See +.IR dbz (3z) +for the history of the underlying database routines. +.SH BUGS +There are a number of undocumented options with obscure effects, +meant for debugging and regression testing of +.IR dbz (3z). +.PP +Permissions for the index files probably ought to be taken from those +of the base file. +.PP +The line-length limit is a blemish, alleviated only slightly by +.BR \-l . diff --git a/libio/dbz/dbz.3z b/libio/dbz/dbz.3z new file mode 100644 index 00000000000..6df25311c70 --- /dev/null +++ b/libio/dbz/dbz.3z @@ -0,0 +1,547 @@ +.TH DBZ 3Z "3 Feb 1991" +.BY "C News" +.SH NAME +dbminit, fetch, store, dbmclose \- somewhat dbm-compatible database routines +.br +dbzfresh, dbzagain, dbzfetch, dbzstore \- database routines +.br +dbzsync, dbzsize, dbzincore, dbzcancel, dbzdebug \- database routines +.SH SYNOPSIS +.nf +.B #include <dbz.h> +.PP +.B dbminit(base) +.B char *base; +.PP +.B datum +.B fetch(key) +.B datum key; +.PP +.B store(key, value) +.B datum key; +.B datum value; +.PP +.B dbmclose() +.PP +.B dbzfresh(base, size, fieldsep, cmap, tagmask) +.B char *base; +.B long size; +.B int fieldsep; +.B int cmap; +.B long tagmask; +.PP +.B dbzagain(base, oldbase) +.B char *base; +.B char *oldbase; +.PP +.B datum +.B dbzfetch(key) +.B datum key; +.PP +.B dbzstore(key, value) +.B datum key; +.B datum value; +.PP +.B dbzsync() +.PP +.B long +.B dbzsize(nentries) +.B long nentries; +.PP +.B dbzincore(newvalue) +.PP +.B dbzcancel() +.PP +.B dbzdebug(newvalue) +.SH DESCRIPTION +These functions provide an indexing system for rapid random access to a +text file (the +.I base +.IR file ). +Subject to certain constraints, they are call-compatible with +.IR dbm (3), +although they also provide some extensions. +(Note that they are +.I not +file-compatible with +.I dbm +or any variant thereof.) +.PP +In principle, +.I dbz +stores key-value pairs, where both key and value are arbitrary sequences +of bytes, specified to the functions by +values of type +.IR datum , +typedefed in the header file to be a structure with members +.I dptr +(a value of type +.I char * +pointing to the bytes) +and +.I dsize +(a value of type +.I int +indicating how long the byte sequence is). +.PP +In practice, +.I dbz +is more restricted than +.IR dbm . +A +.I dbz +database +must be an index into a base file, +with the database +.IR value s +being +.IR fseek (3) +offsets into the base file. +Each such +.I value +must ``point to'' a place in the base file where the corresponding +.I key +sequence is found. +A key can be no longer than +.SM DBZMAXKEY +(a constant defined in the header file) bytes. +No key can be an initial subsequence of another, +which in most applications requires that keys be +either bracketed or terminated in some way (see the +discussion of the +.I fieldsep +parameter of +.IR dbzfresh , +below, +for a fine point on terminators). +.PP +.I Dbminit +opens a database, +an index into the base file +.IR base , +consisting of files +.IB base .dir +and +.IB base .pag +which must already exist. +(If the database is new, they should be zero-length files.) +Subsequent accesses go to that database until +.I dbmclose +is called to close the database. +The base file need not exist at the time of the +.IR dbminit , +but it must exist before accesses are attempted. +.PP +.I Fetch +searches the database for the specified +.IR key , +returning the corresponding +.IR value +if any. +.I Store +stores the +.IR key - value +pair in the database. +.I Store +will fail unless the database files are writeable. +See below for a complication arising from case mapping. +.PP +.I Dbzfresh +is a variant of +.I dbminit +for creating a new database with more control over details. +Unlike for +.IR dbminit , +the database files need not exist: +they will be created if necessary, +and truncated in any case. +.PP +.IR Dbzfresh 's +.I size +parameter specifies the size of the first hash table within the database, +in key-value pairs. +Performance will be best if +.I size +is a prime number and +the number of key-value pairs stored in the database does not exceed +about 2/3 of +.IR size . +(The +.I dbzsize +function, given the expected number of key-value pairs, +will suggest a database size that meets these criteria.) +Assuming that an +.I fseek +offset is 4 bytes, +the +.B .pag +file will be +.RI 4* size +bytes +(the +.B .dir +file is tiny and roughly constant in size) +until +the number of key-value pairs exceeds about 80% of +.IR size . +(Nothing awful will happen if the database grows beyond 100% of +.IR size , +but accesses will slow down somewhat and the +.B .pag +file will grow somewhat.) +.PP +.IR Dbzfresh 's +.I fieldsep +parameter specifies the field separator in the base file. +If this is not +NUL (0), and the last character of a +.I key +argument is NUL, that NUL compares equal to either a NUL or a +.I fieldsep +in the base file. +This permits use of NUL to terminate key strings without requiring that +NULs appear in the base file. +The +.I fieldsep +of a database created with +.I dbminit +is the horizontal-tab character. +.PP +For use in news systems, various forms of case mapping (e.g. uppercase to +lowercase) in keys are available. +The +.I cmap +parameter to +.I dbzfresh +is a single character specifying which of several mapping algorithms to use. +Available algorithms are: +.RS +.TP +.B 0 +case-sensitive: no case mapping +.TP +.B B +same as +.B 0 +.TP +.B NUL +same as +.B 0 +.TP +.B = +case-insensitive: uppercase and lowercase equivalent +.TP +.B b +same as +.B = +.TP +.B C +RFC822 message-ID rules, case-sensitive before `@' (with certain exceptions) +and case-insensitive after +.TP +.B ? +whatever the local default is, normally +.B C +.RE +.PP +Mapping algorithm +.B 0 +(no mapping) is faster than the others and is overwhelmingly the correct +choice for most applications. +Unless compatibility constraints interfere, it is more efficient to pre-map +the keys, storing mapped keys in the base file, than to have +.I dbz +do the mapping on every search. +.PP +For historical reasons, +.I fetch +and +.I store +expect their +.I key +arguments to be pre-mapped, but expect unmapped keys in the base file. +.I Dbzfetch +and +.I dbzstore +do the same jobs but handle all case mapping internally, +so the customer need not worry about it. +.PP +.I Dbz +stores only the database +.IR value s +in its files, relying on reference to the base file to confirm a hit on a key. +References to the base file can be minimized, greatly speeding up searches, +if a little bit of information about the keys can be stored in the +.I dbz +files. +This is ``free'' if there are some unused bits in an +.I fseek +offset, +so that the offset can be +.I tagged +with some information about the key. +The +.I tagmask +parameter of +.I dbzfresh +allows specifying the location of unused bits. +.I Tagmask +should be a mask with +one group of +contiguous +.B 1 +bits. +The bits in the mask should +be unused (0) in +.I most +offsets. +The bit immediately above the mask (the +.I flag +bit) should be unused (0) in +.I all +offsets; +.I (dbz)store +will reject attempts to store a key-value pair in which the +.I value +has the flag bit on. +Apart from this restriction, tagging is invisible to the user. +As a special case, a +.I tagmask +of 1 means ``no tagging'', for use with enormous base files or +on systems with unusual offset representations. +.PP +A +.I size +of 0 +given to +.I dbzfresh +is synonymous with the local default; +the normal default is suitable for tables of 90-100,000 +key-value pairs. +A +.I cmap +of 0 (NUL) is synonymous with the character +.BR 0 , +signifying no case mapping +(note that the character +.B ? +specifies the local default mapping, +normally +.BR C ). +A +.I tagmask +of 0 is synonymous with the local default tag mask, +normally 0x7f000000 (specifying the top bit in a 32-bit offset +as the flag bit, and the next 7 bits as the mask, +which is suitable for base files up to circa 24MB). +Calling +.I dbminit(name) +with the database files empty is equivalent to calling +.IR dbzfresh(name,0,'\et','?',0) . +.PP +When databases are regenerated periodically, as in news, +it is simplest to pick the parameters for a new database based on the old one. +This also permits some memory of past sizes of the old database, so that +a new database size can be chosen to cover expected fluctuations. +.I Dbzagain +is a variant of +.I dbminit +for creating a new database as a new generation of an old database. +The database files for +.I oldbase +must exist. +.I Dbzagain +is equivalent to calling +.I dbzfresh +with the same field separator, case mapping, and tag mask as the old database, +and a +.I size +equal to the result of applying +.I dbzsize +to the largest number of entries in the +.I oldbase +database and its previous 10 generations. +.PP +When many accesses are being done by the same program, +.I dbz +is massively faster if its first hash table is in memory. +If an internal flag is 1, +an attempt is made to read the table in when +the database is opened, and +.I dbmclose +writes it out to disk again (if it was read successfully and +has been modified). +.I Dbzincore +sets the flag to +.I newvalue +(which should be 0 or 1) +and returns the previous value; +this does not affect the status of a database that has already been opened. +The default is 0. +The attempt to read the table in may fail due to memory shortage; +in this case +.I dbz +quietly falls back on its default behavior. +.IR Store s +to an in-memory database are not (in general) written out to the file +until +.IR dbmclose +or +.IR dbzsync , +so if robustness in the presence of crashes +or concurrent accesses +is crucial, in-memory databases +should probably be avoided. +.PP +.I Dbzsync +causes all buffers etc. to be flushed out to the files. +It is typically used as a precaution against crashes or concurrent accesses +when a +.IR dbz -using +process will be running for a long time. +It is a somewhat expensive operation, +especially +for an in-memory database. +.PP +.I Dbzcancel +cancels any pending writes from buffers. +This is typically useful only for in-core databases, since writes are +otherwise done immediately. +Its main purpose is to let a child process, in the wake of a +.IR fork , +do a +.I dbmclose +without writing its parent's data to disk. +.PP +If +.I dbz +has been compiled with debugging facilities available (which makes it +bigger and a bit slower), +.I dbzdebug +alters the value (and returns the previous value) of an internal flag +which (when 1; default is 0) causes +verbose and cryptic debugging output on standard output. +.PP +Concurrent reading of databases is fairly safe, +but there is no (inter)locking, +so concurrent updating is not. +.PP +The database files include a record of the byte order of the processor +creating the database, and accesses by processors with different byte +order will work, although they will be slightly slower. +Byte order is preserved by +.IR dbzagain . +However, +agreement on the size and internal structure of an +.I fseek +offset is necessary, as is consensus on +the character set. +.PP +An open database occupies three +.I stdio +streams and their corresponding file descriptors; +a fourth is needed for an in-memory database. +Memory consumption is negligible (except for +.I stdio +buffers) except for in-memory databases. +.SH SEE ALSO +dbz(1), dbm(3) +.SH DIAGNOSTICS +Functions returning +.I int +values return 0 for success, \-1 for failure. +Functions returning +.I datum +values return a value with +.I dptr +set to NULL for failure. +.I Dbminit +attempts to have +.I errno +set plausibly on return, but otherwise this is not guaranteed. +An +.I errno +of +.B EDOM +from +.I dbminit +indicates that the database did not appear to be in +.I dbz +format. +.SH HISTORY +The original +.I dbz +was written by +Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us). +Later contributions by David Butler and Mark Moraes. +Extensive reworking, +including this documentation, +by Henry Spencer (henry@zoo.toronto.edu) as +part of the C News project. +Hashing function by Peter Honeyman. +.SH BUGS +The +.I dptr +members of returned +.I datum +values point to static storage which is overwritten by later calls. +.PP +Unlike +.IR dbm , +.I dbz +will misbehave if an existing key-value pair is `overwritten' by +a new +.I (dbz)store +with the same key. +The user is responsible for avoiding this by using +.I (dbz)fetch +first to check for duplicates; +an internal optimization remembers the result of the +first search so there is minimal overhead in this. +.PP +Waiting until after +.I dbminit +to bring the base file into existence +will fail if +.IR chdir (2) +has been used meanwhile. +.PP +The RFC822 case mapper implements only a first approximation to the +hideously-complex RFC822 case rules. +.PP +The prime finder in +.I dbzsize +is not particularly quick. +.PP +Should implement the +.I dbm +functions +.IR delete , +.IR firstkey , +and +.IR nextkey . +.PP +On C implementations which trap integer overflow, +.I dbz +will refuse to +.I (dbz)store +an +.I fseek +offset equal to the greatest +representable +positive number, +as this would cause overflow in the biased representation used. +.PP +.I Dbzagain +perhaps ought to notice when many offsets +in the old database were +too big for +tagging, and shrink the tag mask to match. +.PP +Marking +.IR dbz 's +file descriptors +.RI close-on- exec +would be a better approach to the problem +.I dbzcancel +tries to address, but that's harder to do portably. diff --git a/libio/dbz/dbz.c b/libio/dbz/dbz.c new file mode 100644 index 00000000000..c7e8444952b --- /dev/null +++ b/libio/dbz/dbz.c @@ -0,0 +1,1763 @@ +/* + +dbz.c V3.2 + +Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us) +You can use this code in any manner, as long as you leave my name on it +and don't hold me responsible for any problems with it. + +Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988 + +Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes) + +Major reworking by Henry Spencer as part of the C News project. + +These routines replace dbm as used by the usenet news software +(it's not a full dbm replacement by any means). It's fast and +simple. It contains no AT&T code. + +In general, dbz's files are 1/20 the size of dbm's. Lookup performance +is somewhat better, while file creation is spectacularly faster, especially +if the incore facility is used. + +*/ + +#include <stdio.h> +#include <sys/types.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#ifndef __STDC__ +extern int errno; +#endif +#include <dbz.h> + +/* + * #ifdef index. "LIA" = "leave it alone unless you know what you're doing". + * + * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h> + * INDEX_SIZE backward compatibility with old dbz; avoid using this + * NMEMORY number of days of memory for use in sizing new table (LIA) + * INCORE backward compatibility with old dbz; use dbzincore() instead + * DBZDEBUG enable debugging + * DEFSIZE default table size (not as critical as in old dbz) + * OLDBNEWS default case mapping as in old B News; set NOBUFFER + * BNEWS default case mapping as in current B News; set NOBUFFER + * DEFCASE default case-map algorithm selector + * NOTAGS fseek offsets are strange, do not do tagging (see below) + * NPAGBUF size of .pag buffer, in longs (LIA) + * SHISTBUF size of ASCII-file buffer, in bytes (LIA) + * MAXRUN length of run which shifts to next table (see below) (LIA) + * OVERFLOW long-int arithmetic overflow must be avoided, will trap + * NOBUFFER do not buffer hash-table i/o, B News locking is defective + */ + +#ifdef FUNNYSEEKS +#include <unistd.h> +#else +#define SEEK_SET 0 +#endif +#ifdef OVERFLOW +#include <limits.h> +#endif + +static int dbzversion = 3; /* for validating .dir file format */ + +/* + * The dbz database exploits the fact that when news stores a <key,value> + * tuple, the `value' part is a seek offset into a text file, pointing to + * a copy of the `key' part. This avoids the need to store a copy of + * the key in the dbz files. However, the text file *must* exist and be + * consistent with the dbz files, or things will fail. + * + * The basic format of the database is a simple hash table containing the + * values. A value is stored by indexing into the table using a hash value + * computed from the key; collisions are resolved by linear probing (just + * search forward for an empty slot, wrapping around to the beginning of + * the table if necessary). Linear probing is a performance disaster when + * the table starts to get full, so a complication is introduced. The + * database is actually one *or more* tables, stored sequentially in the + * .pag file, and the length of linear-probe sequences is limited. The + * search (for an existing item or an empty slot) always starts in the + * first table, and whenever MAXRUN probes have been done in table N, + * probing continues in table N+1. This behaves reasonably well even in + * cases of massive overflow. There are some other small complications + * added, see comments below. + * + * The table size is fixed for any particular database, but is determined + * dynamically when a database is rebuilt. The strategy is to try to pick + * the size so the first table will be no more than 2/3 full, that being + * slightly before the point where performance starts to degrade. (It is + * desirable to be a bit conservative because the overflow strategy tends + * to produce files with holes in them, which is a nuisance.) + */ + +/* + * The following is for backward compatibility. + */ +#ifdef INDEX_SIZE +#define DEFSIZE INDEX_SIZE +#endif + +/* + * ANSI C says the offset argument to fseek is a long, not an off_t, for some + * reason. Let's use off_t anyway. + */ +#define SOF (sizeof(off_t)) + +/* + * We assume that unused areas of a binary file are zeros, and that the + * bit pattern of `(off_t)0' is all zeros. The alternative is rather + * painful file initialization. Note that okayvalue(), if OVERFLOW is + * defined, knows what value of an offset would cause overflow. + */ +#define VACANT ((off_t)0) +#define BIAS(o) ((o)+1) /* make any valid off_t non-VACANT */ +#define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */ + +/* + * In a Unix implementation, or indeed any in which an off_t is a byte + * count, there are a bunch of high bits free in an off_t. There is a + * use for them. Checking a possible hit by looking it up in the base + * file is relatively expensive, and the cost can be dramatically reduced + * by using some of those high bits to tag the value with a few more bits + * of the key's hash. This detects most false hits without the overhead of + * seek+read+strcmp. We use the top bit to indicate whether the value is + * tagged or not, and don't tag a value which is using the tag bits itself. + * We're in trouble if the off_t representation wants to use the top bit. + * The actual bitmasks and offset come from the configuration stuff, + * which permits fiddling with them as necessary, and also suppressing + * them completely (by defining the masks to 0). We build pre-shifted + * versions of the masks for efficiency. + */ +static off_t tagbits; /* pre-shifted tag mask */ +static off_t taghere; /* pre-shifted tag-enable bit */ +static off_t tagboth; /* tagbits|taghere */ +#define HASTAG(o) ((o)&taghere) +#define TAG(o) ((o)&tagbits) +#define NOTAG(o) ((o)&~tagboth) +#define CANTAG(o) (((o)&tagboth) == 0) +#define MKTAG(v) (((v)<<conf.tagshift)&tagbits) + +/* + * A new, from-scratch database, not built as a rebuild of an old one, + * needs to know table size, casemap algorithm, and tagging. Normally + * the user supplies this info, but there have to be defaults. + */ +#ifndef DEFSIZE +#define DEFSIZE 120011 /* 300007 might be better */ +#endif +#ifdef OLDBNEWS +#define DEFCASE '0' /* B2.10 -- no mapping */ +#define NOBUFFER /* B News locking is defective */ +#endif +#ifdef BNEWS +#define DEFCASE '=' /* B2.11 -- all mapped */ +#define NOBUFFER /* B News locking is defective */ +#endif +#ifndef DEFCASE /* C News compatibility is the default */ +#define DEFCASE 'C' /* C News -- RFC822 mapping */ +#endif +#ifndef NOTAGS +#define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */ +#define TAGMASK 0x7f +#define TAGSHIFT 24 +#else +#define TAGENB 0 /* no tags */ +#define TAGMASK 0 +#define TAGSHIFT 0 +#endif + +/* + * We read configuration info from the .dir file into this structure, + * so we can avoid wired-in assumptions for an existing database. + * + * Among the info is a record of recent peak usages, so that a new table + * size can be chosen intelligently when rebuilding. 10 is a good + * number of usages to keep, since news displays marked fluctuations + * in volume on a 7-day cycle. + */ +struct dbzconfig { + int olddbz; /* .dir file empty but .pag not? */ + off_t tsize; /* table size */ +# ifndef NMEMORY +# define NMEMORY 10 /* # days of use info to remember */ +# endif +# define NUSEDS (1+NMEMORY) + off_t used[NUSEDS]; /* entries used today, yesterday, ... */ + int valuesize; /* size of table values, == SOF */ + int bytemap[SOF]; /* byte-order map */ + char casemap; /* case-mapping algorithm (see cipoint()) */ + char fieldsep; /* field separator in base file, if any */ + off_t tagenb; /* unshifted tag-enable bit */ + off_t tagmask; /* unshifted tag mask */ + int tagshift; /* shift count for tagmask and tagenb */ +}; +static struct dbzconfig conf; +static int getconf(); +static long getno(); +static int putconf(); +static void mybytemap(); +static off_t bytemap(); + +/* + * For a program that makes many, many references to the database, it + * is a large performance win to keep the table in core, if it will fit. + * Note that this does hurt robustness in the event of crashes, and + * dbmclose() *must* be called to flush the in-core database to disk. + * The code is prepared to deal with the possibility that there isn't + * enough memory. There *is* an assumption that a size_t is big enough + * to hold the size (in bytes) of one table, so dbminit() tries to figure + * out whether this is possible first. + * + * The preferred way to ask for an in-core table is to do dbzincore(1) + * before dbminit(). The default is not to do it, although -DINCORE + * overrides this for backward compatibility with old dbz. + * + * We keep only the first table in core. This greatly simplifies the + * code, and bounds memory demand. Furthermore, doing this is a large + * performance win even in the event of massive overflow. + */ +#ifdef INCORE +static int incore = 1; +#else +static int incore = 0; +#endif + +/* + * Stdio buffer for .pag reads. Buffering more than about 16 does not help + * significantly at the densities we try to maintain, and the much larger + * buffers that most stdios default to are much more expensive to fill. + * With small buffers, stdio is performance-competitive with raw read(), + * and it's much more portable. + */ +#ifndef NPAGBUF +#define NPAGBUF 16 +#endif +#ifndef NOBUFFER +#ifdef _IOFBF +static off_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */ +#endif +#endif + +/* + * Stdio buffer for base-file reads. Message-IDs (all news ever needs to + * read) are essentially never longer than 64 bytes, and the typical stdio + * buffer is so much larger that it is much more expensive to fill. + */ +#ifndef SHISTBUF +#define SHISTBUF 64 +#endif +#ifdef _IOFBF +static char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */ +#endif + +/* + * Data structure for recording info about searches. + */ +struct searcher { + off_t place; /* current location in file */ + int tabno; /* which table we're in */ + int run; /* how long we'll stay in this table */ +# ifndef MAXRUN +# define MAXRUN 100 +# endif + long hash; /* the key's hash code (for optimization) */ + off_t tag; /* tag we are looking for */ + int seen; /* have we examined current location? */ + int aborted; /* has i/o error aborted search? */ +}; +static void start(); +#define FRESH ((struct searcher *)NULL) +static off_t search(); +#define NOTFOUND ((off_t)-1) +static int okayvalue(); +static int set(); + +/* + * Arguably the searcher struct for a given routine ought to be local to + * it, but a fetch() is very often immediately followed by a store(), and + * in some circumstances it is a useful performance win to remember where + * the fetch() completed. So we use a global struct and remember whether + * it is current. + */ +static struct searcher srch; +static struct searcher *prevp; /* &srch or FRESH */ + +/* byte-ordering stuff */ +static int mybmap[SOF]; /* my byte order (see mybytemap()) */ +static int bytesame; /* is database order same as mine? */ +#define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap)) +#define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap)) + +/* + * The double parentheses needed to make this work are ugly, but the + * alternative (under most compilers) is to pack around 2K of unused + * strings -- there's just no way to get rid of them. + */ +static int debug; /* controlled by dbzdebug() */ +#ifdef DBZDEBUG +#define DEBUG(args) if (debug) { (void) printf args ; } +#else +#define DEBUG(args) ; +#endif + +/* externals used */ +extern char *malloc(); +extern char *calloc(); +extern void free(); /* ANSI C; some old implementations say int */ +extern int atoi(); +extern long atol(); + +/* misc. forwards */ +static long hash(); +static void crcinit(); +static char *cipoint(); +static char *mapcase(); +static int isprime(); +static FILE *latebase(); + +/* file-naming stuff */ +static char dir[] = ".dir"; +static char pag[] = ".pag"; +static char *enstring(); + +/* central data structures */ +static FILE *basef; /* descriptor for base file */ +static char *basefname; /* name for not-yet-opened base file */ +static FILE *dirf; /* descriptor for .dir file */ +static int dirronly; /* dirf open read-only? */ +static FILE *pagf = NULL; /* descriptor for .pag file */ +static off_t pagpos; /* posn in pagf; only search may set != -1 */ +static int pagronly; /* pagf open read-only? */ +static off_t *corepag; /* incore version of .pag file, if any */ +static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */ +static off_t *getcore(); +static int putcore(); +static int written; /* has a store() been done? */ + +/* + - dbzfresh - set up a new database, no historical info + */ +int /* 0 success, -1 failure */ +dbzfresh(name, size, fs, cmap, tagmask) +char *name; /* base name; .dir and .pag must exist */ +long size; /* table size (0 means default) */ +int fs; /* field-separator character in base file */ +int cmap; /* case-map algorithm (0 means default) */ +off_t tagmask; /* 0 default, 1 no tags */ +{ + register char *fn; + struct dbzconfig c; + register off_t m; + register FILE *f; + + if (pagf != NULL) { + DEBUG(("dbzfresh: database already open\n")); + return(-1); + } + if (size != 0 && size < 2) { + DEBUG(("dbzfresh: preposterous size (%ld)\n", size)); + return(-1); + } + + /* get default configuration */ + if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0) + return(-1); /* "can't happen" */ + + /* and mess with it as specified */ + if (size != 0) + c.tsize = size; + c.fieldsep = fs; + switch (cmap) { + case 0: + case '0': + case 'B': /* 2.10 compat */ + c.casemap = '0'; /* '\0' nicer, but '0' printable! */ + break; + case '=': + case 'b': /* 2.11 compat */ + c.casemap = '='; + break; + case 'C': + c.casemap = 'C'; + break; + case '?': + c.casemap = DEFCASE; + break; + default: + DEBUG(("dbzfresh case map `%c' unknown\n", cmap)); + return(-1); + break; + } + switch (tagmask) { + case 0: /* default */ + break; + case 1: /* no tags */ + c.tagshift = 0; + c.tagmask = 0; + c.tagenb = 0; + break; + default: + m = tagmask; + c.tagshift = 0; + while (!(m&01)) { + m >>= 1; + c.tagshift++; + } + c.tagmask = m; + c.tagenb = (m << 1) & ~m; + break; + } + + /* write it out */ + fn = enstring(name, dir); + if (fn == NULL) + return(-1); + f = fopen(fn, "w"); + free(fn); + if (f == NULL) { + DEBUG(("dbzfresh: unable to write config\n")); + return(-1); + } + if (putconf(f, &c) < 0) { + (void) fclose(f); + return(-1); + } + if (fclose(f) == EOF) { + DEBUG(("dbzfresh: fclose failure\n")); + return(-1); + } + + /* create/truncate .pag */ + fn = enstring(name, pag); + if (fn == NULL) + return(-1); + f = fopen(fn, "w"); + free(fn); + if (f == NULL) { + DEBUG(("dbzfresh: unable to create/truncate .pag file\n")); + return(-1); + } else + (void) fclose(f); + + /* and punt to dbminit for the hard work */ + return(dbminit(name)); +} + +/* + - dbzsize - what's a good table size to hold this many entries? + */ +long +dbzsize(contents) +long contents; /* 0 means what's the default */ +{ + register long n; + + if (contents <= 0) { /* foulup or default inquiry */ + DEBUG(("dbzsize: preposterous input (%ld)\n", contents)); + return(DEFSIZE); + } + n = (contents/2)*3; /* try to keep table at most 2/3 full */ + if (!(n&01)) /* make it odd */ + n++; + DEBUG(("dbzsize: tentative size %ld\n", n)); + while (!isprime(n)) /* and look for a prime */ + n += 2; + DEBUG(("dbzsize: final size %ld\n", n)); + + return(n); +} + +/* + - isprime - is a number prime? + * + * This is not a terribly efficient approach. + */ +static int /* predicate */ +isprime(x) +register long x; +{ + static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 }; + register int *ip; + register long div; + register long stop; + + /* hit the first few primes quickly to eliminate easy ones */ + /* this incidentally prevents ridiculously small tables */ + for (ip = quick; (div = *ip) != 0; ip++) + if (x%div == 0) { + DEBUG(("isprime: quick result on %ld\n", (long)x)); + return(0); + } + + /* approximate square root of x */ + for (stop = x; x/stop < stop; stop >>= 1) + continue; + stop <<= 1; + + /* try odd numbers up to stop */ + for (div = *--ip; div < stop; div += 2) + if (x%div == 0) + return(0); + + return(1); +} + +/* + - dbzagain - set up a new database to be a rebuild of an old one + */ +int /* 0 success, -1 failure */ +dbzagain(name, oldname) +char *name; /* base name; .dir and .pag must exist */ +char *oldname; /* base name; all must exist */ +{ + register char *fn; + struct dbzconfig c; + register int i; + register long top; + register FILE *f; + register int newtable; + register off_t newsize; + + if (pagf != NULL) { + DEBUG(("dbzagain: database already open\n")); + return(-1); + } + + /* pick up the old configuration */ + fn = enstring(oldname, dir); + if (fn == NULL) + return(-1); + f = fopen(fn, "r"); + free(fn); + if (f == NULL) { + DEBUG(("dbzagain: cannot open old .dir file\n")); + return(-1); + } + i = getconf(f, (FILE *)NULL, &c); + (void) fclose(f); + if (i < 0) { + DEBUG(("dbzagain: getconf failed\n")); + return(-1); + } + + /* tinker with it */ + top = 0; + newtable = 0; + for (i = 0; i < NUSEDS; i++) { + if (top < c.used[i]) + top = c.used[i]; + if (c.used[i] == 0) + newtable = 1; /* hasn't got full usage history yet */ + } + if (top == 0) { + DEBUG(("dbzagain: old table has no contents!\n")); + newtable = 1; + } + for (i = NUSEDS-1; i > 0; i--) + c.used[i] = c.used[i-1]; + c.used[0] = 0; + newsize = dbzsize(top); + if (!newtable || newsize > c.tsize) /* don't shrink new table */ + c.tsize = newsize; + + /* write it out */ + fn = enstring(name, dir); + if (fn == NULL) + return(-1); + f = fopen(fn, "w"); + free(fn); + if (f == NULL) { + DEBUG(("dbzagain: unable to write new .dir\n")); + return(-1); + } + i = putconf(f, &c); + (void) fclose(f); + if (i < 0) { + DEBUG(("dbzagain: putconf failed\n")); + return(-1); + } + + /* create/truncate .pag */ + fn = enstring(name, pag); + if (fn == NULL) + return(-1); + f = fopen(fn, "w"); + free(fn); + if (f == NULL) { + DEBUG(("dbzagain: unable to create/truncate .pag file\n")); + return(-1); + } else + (void) fclose(f); + + /* and let dbminit do the work */ + return(dbminit(name)); +} + +/* + - dbminit - open a database, creating it (using defaults) if necessary + * + * We try to leave errno set plausibly, to the extent that underlying + * functions permit this, since many people consult it if dbminit() fails. + */ +int /* 0 success, -1 failure */ +dbminit(name) +char *name; +{ + register int i; + register size_t s; + register char *dirfname; + register char *pagfname; + + if (pagf != NULL) { + DEBUG(("dbminit: dbminit already called once\n")); + errno = 0; + return(-1); + } + + /* open the .dir file */ + dirfname = enstring(name, dir); + if (dirfname == NULL) + return(-1); + dirf = fopen(dirfname, "r+"); + if (dirf == NULL) { + dirf = fopen(dirfname, "r"); + dirronly = 1; + } else + dirronly = 0; + free(dirfname); + if (dirf == NULL) { + DEBUG(("dbminit: can't open .dir file\n")); + return(-1); + } + + /* open the .pag file */ + pagfname = enstring(name, pag); + if (pagfname == NULL) { + (void) fclose(dirf); + return(-1); + } + pagf = fopen(pagfname, "r+b"); + if (pagf == NULL) { + pagf = fopen(pagfname, "rb"); + if (pagf == NULL) { + DEBUG(("dbminit: .pag open failed\n")); + (void) fclose(dirf); + free(pagfname); + return(-1); + } + pagronly = 1; + } else if (dirronly) + pagronly = 1; + else + pagronly = 0; +#ifdef NOBUFFER + /* + * B News does not do adequate locking on its database accesses. + * Why it doesn't get into trouble using dbm is a mystery. In any + * case, doing unbuffered i/o does not cure the problem, but does + * enormously reduce its incidence. + */ + (void) setbuf(pagf, (char *)NULL); +#else +#ifdef _IOFBF + (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf)); +#endif +#endif + pagpos = -1; + /* don't free pagfname, need it below */ + + /* open the base file */ + basef = fopen(name, "r"); + if (basef == NULL) { + DEBUG(("dbminit: basefile open failed\n")); + basefname = enstring(name, ""); + if (basefname == NULL) { + (void) fclose(pagf); + (void) fclose(dirf); + free(pagfname); + pagf = NULL; + return(-1); + } + } else + basefname = NULL; +#ifdef _IOFBF + if (basef != NULL) + (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf)); +#endif + + /* pick up configuration */ + if (getconf(dirf, pagf, &conf) < 0) { + DEBUG(("dbminit: getconf failure\n")); + (void) fclose(basef); + (void) fclose(pagf); + (void) fclose(dirf); + free(pagfname); + pagf = NULL; + errno = EDOM; /* kind of a kludge, but very portable */ + return(-1); + } + tagbits = conf.tagmask << conf.tagshift; + taghere = conf.tagenb << conf.tagshift; + tagboth = tagbits | taghere; + mybytemap(mybmap); + bytesame = 1; + for (i = 0; i < SOF; i++) + if (mybmap[i] != conf.bytemap[i]) + bytesame = 0; + + /* get first table into core, if it looks desirable and feasible */ + s = (size_t)conf.tsize * SOF; + if (incore && (off_t)(s/SOF) == conf.tsize) { + bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b"); + if (bufpagf != NULL) + corepag = getcore(bufpagf); + } else { + bufpagf = NULL; + corepag = NULL; + } + free(pagfname); + + /* misc. setup */ + crcinit(); + written = 0; + prevp = FRESH; + DEBUG(("dbminit: succeeded\n")); + return(0); +} + +/* + - enstring - concatenate two strings into a malloced area + */ +static char * /* NULL if malloc fails */ +enstring(s1, s2) +char *s1; +char *s2; +{ + register char *p; + + p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1); + if (p != NULL) { + (void) strcpy(p, s1); + (void) strcat(p, s2); + } else { + DEBUG(("enstring(%s, %s) out of memory\n", s1, s2)); + } + return(p); +} + +/* + - dbmclose - close a database + */ +int +dbmclose() +{ + register int ret = 0; + + if (pagf == NULL) { + DEBUG(("dbmclose: not opened!\n")); + return(-1); + } + + if (fclose(pagf) == EOF) { + DEBUG(("dbmclose: fclose(pagf) failed\n")); + ret = -1; + } + pagf = basef; /* ensure valid pointer; dbzsync checks it */ + if (dbzsync() < 0) + ret = -1; + if (bufpagf != NULL && fclose(bufpagf) == EOF) { + DEBUG(("dbmclose: fclose(bufpagf) failed\n")); + ret = -1; + } + if (corepag != NULL) + free((char *)corepag); + corepag = NULL; + if (fclose(basef) == EOF) { + DEBUG(("dbmclose: fclose(basef) failed\n")); + ret = -1; + } + if (basefname != NULL) + free(basefname); + basef = NULL; + pagf = NULL; + if (fclose(dirf) == EOF) { + DEBUG(("dbmclose: fclose(dirf) failed\n")); + ret = -1; + } + + DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed")); + return(ret); +} + +/* + - dbzsync - push all in-core data out to disk + */ +int +dbzsync() +{ + register int ret = 0; + + if (pagf == NULL) { + DEBUG(("dbzsync: not opened!\n")); + return(-1); + } + if (!written) + return(0); + + if (corepag != NULL) { + if (putcore(corepag, bufpagf) < 0) { + DEBUG(("dbzsync: putcore failed\n")); + ret = -1; + } + } + if (!conf.olddbz) + if (putconf(dirf, &conf) < 0) + ret = -1; + + DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed")); + return(ret); +} + +/* + - dbzcancel - cancel writing of in-core data + * Mostly for use from child processes. + * Note that we don't need to futz around with stdio buffers, because we + * always fflush them immediately anyway and so they never have stale data. + */ +int +dbzcancel() +{ + if (pagf == NULL) { + DEBUG(("dbzcancel: not opened!\n")); + return(-1); + } + + written = 0; + return(0); +} + +/* + - dbzfetch - fetch() with case mapping built in + */ +datum +dbzfetch(key) +datum key; +{ + char buffer[DBZMAXKEY + 1]; + datum mappedkey; + register size_t keysize; + + DEBUG(("dbzfetch: (%s)\n", key.dptr)); + + /* Key is supposed to be less than DBZMAXKEY */ + keysize = key.dsize; + if (keysize >= DBZMAXKEY) { + keysize = DBZMAXKEY; + DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY)); + } + + mappedkey.dptr = mapcase(buffer, key.dptr, keysize); + buffer[keysize] = '\0'; /* just a debug aid */ + mappedkey.dsize = keysize; + + return(fetch(mappedkey)); +} + +/* + - fetch - get an entry from the database + * + * Disgusting fine point, in the name of backward compatibility: if the + * last character of "key" is a NUL, that character is (effectively) not + * part of the comparison against the stored keys. + */ +datum /* dptr NULL, dsize 0 means failure */ +fetch(key) +datum key; +{ + char buffer[DBZMAXKEY + 1]; + static off_t key_ptr; /* return value points here */ + datum output; + register size_t keysize; + register size_t cmplen; + register char *sepp; + + DEBUG(("fetch: (%s)\n", key.dptr)); + output.dptr = NULL; + output.dsize = 0; + prevp = FRESH; + + /* Key is supposed to be less than DBZMAXKEY */ + keysize = key.dsize; + if (keysize >= DBZMAXKEY) { + keysize = DBZMAXKEY; + DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY)); + } + + if (pagf == NULL) { + DEBUG(("fetch: database not open!\n")); + return(output); + } else if (basef == NULL) { /* basef didn't exist yet */ + basef = latebase(); + if (basef == NULL) + return(output); + } + + cmplen = keysize; + sepp = &conf.fieldsep; + if (key.dptr[keysize-1] == '\0') { + cmplen--; + sepp = &buffer[keysize-1]; + } + start(&srch, &key, FRESH); + while ((key_ptr = search(&srch)) != NOTFOUND) { + DEBUG(("got 0x%lx\n", key_ptr)); + + /* fetch the key */ + if (fseek(basef, key_ptr, SEEK_SET) != 0) { + DEBUG(("fetch: seek failed\n")); + return(output); + } + if (fread(buffer, 1, keysize, basef) != keysize) { + DEBUG(("fetch: read failed\n")); + return(output); + } + + /* try it */ + buffer[keysize] = '\0'; /* terminated for DEBUG */ + (void) mapcase(buffer, buffer, keysize); + DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n", + buffer, key.dptr, keysize)); + if (memcmp(key.dptr, buffer, cmplen) == 0 && + (*sepp == conf.fieldsep || *sepp == '\0')) { + /* we found it */ + output.dptr = (char *)&key_ptr; + output.dsize = SOF; + DEBUG(("fetch: successful\n")); + return(output); + } + } + + /* we didn't find it */ + DEBUG(("fetch: failed\n")); + prevp = &srch; /* remember where we stopped */ + return(output); +} + +/* + - latebase - try to open a base file that wasn't there at the start + */ +static FILE * +latebase() +{ + register FILE *it; + + if (basefname == NULL) { + DEBUG(("latebase: name foulup\n")); + return(NULL); + } + it = fopen(basefname, "r"); + if (it == NULL) { + DEBUG(("latebase: still can't open base\n")); + } else { + DEBUG(("latebase: late open succeeded\n")); + free(basefname); + basefname = NULL; +#ifdef _IOFBF + (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf)); +#endif + } + return(it); +} + +/* + - dbzstore - store() with case mapping built in + */ +int +dbzstore(key, data) +datum key; +datum data; +{ + char buffer[DBZMAXKEY + 1]; + datum mappedkey; + register size_t keysize; + + DEBUG(("dbzstore: (%s)\n", key.dptr)); + + /* Key is supposed to be less than DBZMAXKEY */ + keysize = key.dsize; + if (keysize >= DBZMAXKEY) { + DEBUG(("dbzstore: key size too big (%d)\n", key.dsize)); + return(-1); + } + + mappedkey.dptr = mapcase(buffer, key.dptr, keysize); + buffer[keysize] = '\0'; /* just a debug aid */ + mappedkey.dsize = keysize; + + return(store(mappedkey, data)); +} + +/* + - store - add an entry to the database + */ +int /* 0 success, -1 failure */ +store(key, data) +datum key; +datum data; +{ + off_t value; + + if (pagf == NULL) { + DEBUG(("store: database not open!\n")); + return(-1); + } else if (basef == NULL) { /* basef didn't exist yet */ + basef = latebase(); + if (basef == NULL) + return(-1); + } + if (pagronly) { + DEBUG(("store: database open read-only\n")); + return(-1); + } + if (data.dsize != SOF) { + DEBUG(("store: value size wrong (%d)\n", data.dsize)); + return(-1); + } + if (key.dsize >= DBZMAXKEY) { + DEBUG(("store: key size too big (%d)\n", key.dsize)); + return(-1); + } + + /* copy the value in to ensure alignment */ + (void) memcpy((char *)&value, data.dptr, SOF); + DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value)); + if (!okayvalue(value)) { + DEBUG(("store: reserved bit or overflow in 0x%lx\n", value)); + return(-1); + } + + /* find the place, exploiting previous search if possible */ + start(&srch, &key, prevp); + while (search(&srch) != NOTFOUND) + continue; + + prevp = FRESH; + conf.used[0]++; + DEBUG(("store: used count %ld\n", conf.used[0])); + written = 1; + return(set(&srch, value)); +} + +/* + - dbzincore - control attempts to keep .pag file in core + */ +int /* old setting */ +dbzincore(value) +int value; +{ + register int old = incore; + + incore = value; + return(old); +} + +/* + - getconf - get configuration from .dir file + */ +static int /* 0 success, -1 failure */ +getconf(df, pf, cp) +register FILE *df; /* NULL means just give me the default */ +register FILE *pf; /* NULL means don't care about .pag */ +register struct dbzconfig *cp; +{ + register int c; + register int i; + int err = 0; + + c = (df != NULL) ? getc(df) : EOF; + if (c == EOF) { /* empty file, no configuration known */ + cp->olddbz = 0; + if (df != NULL && pf != NULL && getc(pf) != EOF) + cp->olddbz = 1; + cp->tsize = DEFSIZE; + cp->fieldsep = '\t'; + for (i = 0; i < NUSEDS; i++) + cp->used[i] = 0; + cp->valuesize = SOF; + mybytemap(cp->bytemap); + cp->casemap = DEFCASE; + cp->tagenb = TAGENB; + cp->tagmask = TAGMASK; + cp->tagshift = TAGSHIFT; + DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n", + cp->tsize, cp->casemap, cp->tagenb, + cp->tagmask, cp->tagshift)); + return(0); + } + (void) ungetc(c, df); + + /* first line, the vital stuff */ + if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z') + err = -1; + if (getno(df, &err) != dbzversion) + err = -1; + cp->tsize = getno(df, &err); + cp->fieldsep = getno(df, &err); + while ((c = getc(df)) == ' ') + continue; + cp->casemap = c; + cp->tagenb = getno(df, &err); + cp->tagmask = getno(df, &err); + cp->tagshift = getno(df, &err); + cp->valuesize = getno(df, &err); + if (cp->valuesize != SOF) { + DEBUG(("getconf: wrong off_t size (%d)\n", cp->valuesize)); + err = -1; + cp->valuesize = SOF; /* to protect the loops below */ + } + for (i = 0; i < cp->valuesize; i++) + cp->bytemap[i] = getno(df, &err); + if (getc(df) != '\n') + err = -1; + DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize, + cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask, + cp->tagshift)); + DEBUG(("bytemap (%d)", cp->valuesize)); + for (i = 0; i < cp->valuesize; i++) { + DEBUG((" %d", cp->bytemap[i])); + } + DEBUG(("\n")); + + /* second line, the usages */ + for (i = 0; i < NUSEDS; i++) + cp->used[i] = getno(df, &err); + if (getc(df) != '\n') + err = -1; + DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2])); + + if (err < 0) { + DEBUG(("getconf error\n")); + return(-1); + } + return(0); +} + +/* + - getno - get a long + */ +static long +getno(f, ep) +FILE *f; +int *ep; +{ + register char *p; +# define MAXN 50 + char getbuf[MAXN]; + register int c; + + while ((c = getc(f)) == ' ') + continue; + if (c == EOF || c == '\n') { + DEBUG(("getno: missing number\n")); + *ep = -1; + return(0); + } + p = getbuf; + *p++ = c; + while ((c = getc(f)) != EOF && c != '\n' && c != ' ') + if (p < &getbuf[MAXN-1]) + *p++ = c; + if (c == EOF) { + DEBUG(("getno: EOF\n")); + *ep = -1; + } else + (void) ungetc(c, f); + *p = '\0'; + + if (strspn(getbuf, "-1234567890") != strlen(getbuf)) { + DEBUG(("getno: `%s' non-numeric\n", getbuf)); + *ep = -1; + } + return(atol(getbuf)); +} + +/* + - putconf - write configuration to .dir file + */ +static int /* 0 success, -1 failure */ +putconf(f, cp) +register FILE *f; +register struct dbzconfig *cp; +{ + register int i; + register int ret = 0; + + if (fseek(f, 0, SEEK_SET) != 0) { + DEBUG(("fseek failure in putconf\n")); + ret = -1; + } + fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion, cp->tsize, + cp->fieldsep, cp->casemap, cp->tagenb, + cp->tagmask, cp->tagshift, cp->valuesize); + for (i = 0; i < cp->valuesize; i++) + fprintf(f, " %d", cp->bytemap[i]); + fprintf(f, "\n"); + for (i = 0; i < NUSEDS; i++) + fprintf(f, "%ld%c", cp->used[i], (i < NUSEDS-1) ? ' ' : '\n'); + + (void) fflush(f); + if (ferror(f)) + ret = -1; + + DEBUG(("putconf status %d\n", ret)); + return(ret); +} + +/* + - getcore - try to set up an in-core copy of .pag file + */ +static off_t * /* pointer to copy, or NULL */ +getcore(f) +FILE *f; +{ + register off_t *p; + register size_t i; + register size_t nread; + register char *it; + + it = malloc((size_t)conf.tsize * SOF); + if (it == NULL) { + DEBUG(("getcore: malloc failed\n")); + return(NULL); + } + + nread = fread(it, SOF, (size_t)conf.tsize, f); + if (ferror(f)) { + DEBUG(("getcore: read failed\n")); + free(it); + return(NULL); + } + + p = (off_t *)it + nread; + i = (size_t)conf.tsize - nread; + while (i-- > 0) + *p++ = VACANT; + return((off_t *)it); +} + +/* + - putcore - try to rewrite an in-core table + */ +static int /* 0 okay, -1 fail */ +putcore(tab, f) +off_t *tab; +FILE *f; +{ + if (fseek(f, 0, SEEK_SET) != 0) { + DEBUG(("fseek failure in putcore\n")); + return(-1); + } + (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f); + (void) fflush(f); + return((ferror(f)) ? -1 : 0); +} + +/* + - start - set up to start or restart a search + */ +static void +start(sp, kp, osp) +register struct searcher *sp; +register datum *kp; +register struct searcher *osp; /* may be FRESH, i.e. NULL */ +{ + register long h; + + h = hash(kp->dptr, kp->dsize); + if (osp != FRESH && osp->hash == h) { + if (sp != osp) + *sp = *osp; + DEBUG(("search restarted\n")); + } else { + sp->hash = h; + sp->tag = MKTAG(h / conf.tsize); + DEBUG(("tag 0x%lx\n", sp->tag)); + sp->place = h % conf.tsize; + sp->tabno = 0; + sp->run = (conf.olddbz) ? conf.tsize : MAXRUN; + sp->aborted = 0; + } + sp->seen = 0; +} + +/* + - search - conduct part of a search + */ +static off_t /* NOTFOUND if we hit VACANT or error */ +search(sp) +register struct searcher *sp; +{ + register off_t dest; + register off_t value; + off_t val; /* buffer for value (can't fread register) */ + register off_t place; + + if (sp->aborted) + return(NOTFOUND); + + for (;;) { + /* determine location to be examined */ + place = sp->place; + if (sp->seen) { + /* go to next location */ + if (--sp->run <= 0) { + sp->tabno++; + sp->run = MAXRUN; + } + place = (place+1)%conf.tsize + sp->tabno*conf.tsize; + sp->place = place; + } else + sp->seen = 1; /* now looking at current location */ + DEBUG(("search @ %ld\n", place)); + + /* get the tagged value */ + if (corepag != NULL && place < conf.tsize) { + DEBUG(("search: in core\n")); + value = MAPIN(corepag[place]); + } else { + /* seek, if necessary */ + dest = place * SOF; + if (pagpos != dest) { + if (fseek(pagf, dest, SEEK_SET) != 0) { + DEBUG(("search: seek failed\n")); + pagpos = -1; + sp->aborted = 1; + return(NOTFOUND); + } + pagpos = dest; + } + + /* read it */ + if (fread((char *)&val, sizeof(val), 1, pagf) == 1) + value = MAPIN(val); + else if (ferror(pagf)) { + DEBUG(("search: read failed\n")); + pagpos = -1; + sp->aborted = 1; + return(NOTFOUND); + } else + value = VACANT; + + /* and finish up */ + pagpos += sizeof(val); + } + + /* vacant slot is always cause to return */ + if (value == VACANT) { + DEBUG(("search: empty slot\n")); + return(NOTFOUND); + }; + + /* check the tag */ + value = UNBIAS(value); + DEBUG(("got 0x%lx\n", value)); + if (!HASTAG(value)) { + DEBUG(("tagless\n")); + return(value); + } else if (TAG(value) == sp->tag) { + DEBUG(("match\n")); + return(NOTAG(value)); + } else { + DEBUG(("mismatch 0x%lx\n", TAG(value))); + } + } + /* NOTREACHED */ +} + +/* + - okayvalue - check that a value can be stored + */ +static int /* predicate */ +okayvalue(value) +off_t value; +{ + if (HASTAG(value)) + return(0); +#ifdef OVERFLOW + if (value == LONG_MAX) /* BIAS() and UNBIAS() will overflow */ + return(0); +#endif + return(1); +} + +/* + - set - store a value into a location previously found by search + */ +static int /* 0 success, -1 failure */ +set(sp, value) +register struct searcher *sp; +off_t value; +{ + register off_t place = sp->place; + register off_t v = value; + + if (sp->aborted) + return(-1); + + if (CANTAG(v) && !conf.olddbz) { + v |= sp->tag | taghere; + if (v != UNBIAS(VACANT)) /* BIAS(v) won't look VACANT */ +#ifdef OVERFLOW + if (v != LONG_MAX) /* and it won't overflow */ +#endif + value = v; + } + DEBUG(("tagged value is 0x%lx\n", value)); + value = BIAS(value); + value = MAPOUT(value); + + /* If we have the index file in memory, use it */ + if (corepag != NULL && place < conf.tsize) { + corepag[place] = value; + DEBUG(("set: incore\n")); + return(0); + } + + /* seek to spot */ + pagpos = -1; /* invalidate position memory */ + if (fseek(pagf, place * SOF, SEEK_SET) != 0) { + DEBUG(("set: seek failed\n")); + sp->aborted = 1; + return(-1); + } + + /* write in data */ + if (fwrite((char *)&value, SOF, 1, pagf) != 1) { + DEBUG(("set: write failed\n")); + sp->aborted = 1; + return(-1); + } + /* fflush improves robustness, and buffer re-use is rare anyway */ + if (fflush(pagf) == EOF) { + DEBUG(("set: fflush failed\n")); + sp->aborted = 1; + return(-1); + } + + DEBUG(("set: succeeded\n")); + return(0); +} + +/* + - mybytemap - determine this machine's byte map + * + * A byte map is an array of ints, sizeof(off_t) of them. The 0th int + * is the byte number of the high-order byte in my off_t, and so forth. + */ +static void +mybytemap(map) +int map[]; /* -> int[SOF] */ +{ + union { + off_t o; + char c[SOF]; + } u; + register int *mp = &map[SOF]; + register int ntodo; + register int i; + + u.o = 1; + for (ntodo = (int)SOF; ntodo > 0; ntodo--) { + for (i = 0; i < SOF; i++) + if (u.c[i] != 0) + break; + if (i == SOF) { + /* trouble -- set it to *something* consistent */ + DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo)); + for (i = 0; i < SOF; i++) + map[i] = i; + return; + } + DEBUG(("mybytemap: byte %d\n", i)); + *--mp = i; + while (u.c[i] != 0) + u.o <<= 1; + } +} + +/* + - bytemap - transform an off_t from byte ordering map1 to map2 + */ +static off_t /* transformed result */ +bytemap(ino, map1, map2) +off_t ino; +int *map1; +int *map2; +{ + union oc { + off_t o; + char c[SOF]; + }; + union oc in; + union oc out; + register int i; + + in.o = ino; + for (i = 0; i < SOF; i++) + out.c[map2[i]] = in.c[map1[i]]; + return(out.o); +} + +/* + * This is a simplified version of the pathalias hashing function. + * Thanks to Steve Belovin and Peter Honeyman + * + * hash a string into a long int. 31 bit crc (from andrew appel). + * the crc table is computed at run time by crcinit() -- we could + * precompute, but it takes 1 clock tick on a 750. + * + * This fast table calculation works only if POLY is a prime polynomial + * in the field of integers modulo 2. Since the coefficients of a + * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is + * implicit. IT MUST ALSO BE THE CASE that the coefficients of orders + * 31 down to 25 are zero. Happily, we have candidates, from + * E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962): + * x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0 + * x^31 + x^3 + x^0 + * + * We reverse the bits to get: + * 111101010000000000000000000000001 but drop the last 1 + * f 5 0 0 0 0 0 0 + * 010010000000000000000000000000001 ditto, for 31-bit crc + * 4 8 0 0 0 0 0 0 + */ + +#define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */ + +static long CrcTable[128]; + +/* + - crcinit - initialize tables for hash function + */ +static void +crcinit() +{ + register int i, j; + register long sum; + + for (i = 0; i < 128; ++i) { + sum = 0L; + for (j = 7 - 1; j >= 0; --j) + if (i & (1 << j)) + sum ^= POLY >> j; + CrcTable[i] = sum; + } + DEBUG(("crcinit: done\n")); +} + +/* + - hash - Honeyman's nice hashing function + */ +static long +hash(name, size) +register char *name; +register int size; +{ + register long sum = 0L; + + while (size--) { + sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f]; + } + DEBUG(("hash: returns (%ld)\n", sum)); + return(sum); +} + +/* + * case-mapping stuff + * + * Borrowed from C News, by permission of the authors. Somewhat modified. + * + * We exploit the fact that we are dealing only with headers here, and + * headers are limited to the ASCII characters by RFC822. It is barely + * possible that we might be dealing with a translation into another + * character set, but in particular it's very unlikely for a header + * character to be outside -128..255. + * + * Life would be a whole lot simpler if tolower() could safely and portably + * be applied to any char. + */ + +#define OFFSET 128 /* avoid trouble with negative chars */ + +/* must call casencmp before invoking TOLOW... */ +#define TOLOW(c) (cmap[(c)+OFFSET]) + +/* ...but the use of it in CISTREQN is safe without the preliminary call (!) */ +/* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */ +#define CISTREQN(a, b, n) \ + (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0) + +#define MAPSIZE (256+OFFSET) +static char cmap[MAPSIZE]; /* relies on init to '\0' */ +static int mprimed = 0; /* has cmap been set up? */ + +/* + - mapprime - set up case-mapping stuff + */ +static void +mapprime() +{ + register char *lp; + register char *up; + register int c; + register int i; + static char lower[] = "abcdefghijklmnopqrstuvwxyz"; + static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + for (lp = lower, up = upper; *lp != '\0'; lp++, up++) { + c = *lp; + cmap[c+OFFSET] = c; + cmap[*up+OFFSET] = c; + } + for (i = 0; i < MAPSIZE; i++) + if (cmap[i] == '\0') + cmap[i] = (char)(i-OFFSET); + mprimed = 1; +} + +/* + - casencmp - case-independent strncmp + */ +static int /* < == > 0 */ +casencmp(s1, s2, len) +char *s1; +char *s2; +int len; +{ + register char *p1; + register char *p2; + register int n; + + if (!mprimed) + mapprime(); + + p1 = s1; + p2 = s2; + n = len; + while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) { + p1++; + p2++; + } + if (n < 0) + return(0); + + /* + * The following case analysis is necessary so that characters + * which look negative collate low against normal characters but + * high against the end-of-string NUL. + */ + if (*p1 == '\0' && *p2 == '\0') + return(0); + else if (*p1 == '\0') + return(-1); + else if (*p2 == '\0') + return(1); + else + return(TOLOW(*p1) - TOLOW(*p2)); +} + +/* + - mapcase - do case-mapped copy + */ +static char * /* returns src or dst */ +mapcase(dst, src, siz) +char *dst; /* destination, used only if mapping needed */ +char *src; /* source; src == dst is legal */ +size_t siz; +{ + register char *s; + register char *d; + register char *c; /* case break */ + register char *e; /* end of source */ + + + c = cipoint(src, siz); + if (c == NULL) + return(src); + + if (!mprimed) + mapprime(); + s = src; + e = s + siz; + d = dst; + + while (s < c) + *d++ = *s++; + while (s < e) + *d++ = TOLOW(*s++); + + return(dst); +} + +/* + - cipoint - where in this message-ID does it become case-insensitive? + * + * The RFC822 code is not quite complete. Absolute, total, full RFC822 + * compliance requires a horrible parsing job, because of the arcane + * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi, + * for example. There are three or four things that might occur in the + * domain part of a message-id that are case-sensitive. They don't seem + * to ever occur in real news, thank Cthulhu. (What? You were expecting + * a merciful and forgiving deity to be invoked in connection with RFC822? + * Forget it; none of them would come near it.) + */ +static char * /* pointer into s, or NULL for "nowhere" */ +cipoint(s, siz) +char *s; +size_t siz; +{ + register char *p; + static char post[] = "postmaster"; + static int plen = sizeof(post)-1; + + switch (conf.casemap) { + case '0': /* unmapped, sensible */ + return(NULL); + break; + case 'C': /* C News, RFC 822 conformant (approx.) */ + p = memchr(s, '@', siz); + if (p == NULL) /* no local/domain split */ + return(NULL); /* assume all local */ + else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) { + /* crazy -- "postmaster" is case-insensitive */ + return(s); + } else + return(p); + break; + case '=': /* 2.11, neither sensible nor conformant */ + return(s); /* all case-insensitive */ + break; + } + + DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap)); + return(NULL); /* just leave it alone */ +} + +/* + - dbzdebug - control dbz debugging at run time + */ +int /* old value */ +dbzdebug(value) +int value; +{ +#ifdef DBZDEBUG + register int old = debug; + + debug = value; + return(old); +#else + return(-1); +#endif +} diff --git a/libio/dbz/dbz.h b/libio/dbz/dbz.h new file mode 100644 index 00000000000..3d7e8ed702c --- /dev/null +++ b/libio/dbz/dbz.h @@ -0,0 +1,32 @@ +/* for dbm and dbz */ +typedef struct { + char *dptr; + int dsize; +} datum; + +/* standard dbm functions */ +extern int dbminit(); +extern datum fetch(); +extern int store(); +extern int delete(); /* not in dbz */ +extern datum firstkey(); /* not in dbz */ +extern datum nextkey(); /* not in dbz */ +extern int dbmclose(); /* in dbz, but not in old dbm */ + +/* new stuff for dbz */ +extern int dbzfresh(); +extern int dbzagain(); +extern datum dbzfetch(); +extern int dbzstore(); +extern int dbzsync(); +extern long dbzsize(); +extern int dbzincore(); +extern int dbzcancel(); +extern int dbzdebug(); + +/* + * In principle we could handle unlimited-length keys by operating a chunk + * at a time, but it's not worth it in practice. Setting a nice large + * bound on them simplifies the code and doesn't hurt anything. + */ +#define DBZMAXKEY 255 diff --git a/libio/dbz/dbzmain.c b/libio/dbz/dbzmain.c new file mode 100644 index 00000000000..4317a0d0831 --- /dev/null +++ b/libio/dbz/dbzmain.c @@ -0,0 +1,519 @@ +/* + * dbz - use and test dbz in various ways + * + * -Log- + */ + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> +#include <dbz.h> + +#ifdef FUNNYSEEKS +#include <unistd.h> +#else +#define SEEK_SET 0 +#endif + +#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) + +#ifndef lint +static char RCSid[] = "$Header: /egcs/carton/cvsfiles/egcs/./libio/dbz/dbzmain.c,v 1.1 1997/08/21 22:58:23 jason Exp $"; +#endif + +char *progname; + +char *inname = "(no file)"; /* filename for messages etc. */ +long lineno; /* line number for messages etc. */ + +char *my_basename; +char *pagname; +char *dir_name; +char *str2dup(); +FILE *base; + +int op = 'b'; /* what to do, default build a new table */ +int baseinput = 1; /* is the base file also the input? */ + +char *from = NULL; /* old table to use for dbzagain() */ +int omitzero = 0; /* omit lines tagged with 0 */ +long every = 0; /* report every n lines */ +int syncs = 0; /* dbzsync() on each report */ +int quick = 0; /* quick checking, not too thorough */ +int sweep = 0; /* sweep file checking all offsets */ +int useincore = 1; /* should we use incore facility? */ +long xxx = 0; /* debugging variable */ +int printx = 0; /* print xxx after all is done */ +int unique = 1; /* before store(), check with fetch() */ +int usefresh = 0; /* use dbzfresh? */ +long siz = 0; /* -p size */ +char map = 'C'; /* -p map */ +long tag = 0; /* -p tag mask */ +int exact = 0; /* do not run dbzsize(siz) */ +int dbzint = 1; /* use new interface? */ +char fs = '\t'; /* field separator, default tab */ +int unopen = 0; /* make base unopenable during dbminit? */ +char *change = NULL; /* chdir here before dbmclose */ + +#define DEFBUF 1024 /* default line-buffer size */ +int buflen = DEFBUF; /* line length limit */ +char lbuf[DEFBUF]; +char *line = lbuf; +char cbuf[DEFBUF]; +char *cmp = cbuf; + +void fail(); +void dofile(); +void runs(); +void dosweep(); +void mkfiles(); +void crfile(); +void doline(); +void process(); + +#ifdef HAVERFCIZE +extern char *rfc822ize(); +#else +#define rfc822ize(n) (n) +#endif + +extern char *malloc(); + +/* + - main - parse arguments and handle options + */ +int +main(argc, argv) +int argc; +char *argv[]; +{ + int c; + int errflg = 0; + extern int optind; + extern char *optarg; + int doruns = 0; + extern long atol(); + + progname = argv[0]; + + while ((c = getopt(argc, argv, "axcmt:l:R0E:SqOiX:Yuf:p:eMUC:d")) != EOF) + switch (c) { + case 'a': /* append to existing table */ + if (op != 'b') + fail("only one of -a -x -c -m can be given", ""); + op = 'a'; + baseinput = 0; + break; + case 'x': /* extract from existing table */ + if (op != 'b') + fail("only one of -a -x -c -m can be given", ""); + op = 'x'; + baseinput = 0; + break; + case 'c': /* check existing table */ + if (op != 'b') + fail("only one of -a -x -c -m can be given", ""); + op = 'c'; + break; + case 'm': /* extract missing (complement of -x) */ + if (op != 'b') + fail("only one of -a -x -c -m can be given", ""); + op = 'm'; + baseinput = 0; + break; + case 't': /* set field separator */ + if (strlen(optarg) > 1) + fail("only one field separator allowed", ""); + fs = *optarg; + break; + case 'l': /* override line-length limit */ + buflen = atoi(optarg) + 1; + if (buflen <= 2) + fail("bad -l value `%s'", optarg); + line = malloc(buflen); + cmp = malloc(buflen); + if (line == NULL || cmp == NULL) + fail("cannot allocate %s-byte buffers", optarg); + break; + case 'R': /* print run statistics */ + doruns = 1; + break; + case '0': /* omit lines tagged (by fake -t) with 0 */ + omitzero = 1; + break; + case 'E': /* report every n items */ + every = atol(optarg); + break; + case 'S': /* dbzsync() on each -E report */ + syncs = 1; + break; + case 'q': /* quick check or extract */ + quick = 1; + break; + case 'O': /* sweep file checking all offsets */ + sweep = 1; + break; + case 'i': /* don't use incore */ + useincore = 0; + break; + case 'X': /* set xxx */ + xxx = atoi(optarg); + break; + case 'Y': /* print xxx afterward */ + printx = 1; + break; + case 'u': /* don't check uniqueness */ + unique = 0; + break; + case 'f': /* init from existing table's parameters */ + from = optarg; + break; + case 'p': /* parameters for dbzfresh */ + if (sscanf(optarg, "%ld %1s %lx", &siz, &map, &tag) != 3) { + map = '?'; + tag = 0; + if (sscanf(optarg, "%ld", &siz) != 1) + fail("bad -n value `%s'", optarg); + } + usefresh = 1; + break; + case 'e': /* -p size is exact, don't dbzsize() it */ + exact = 1; + break; + case 'M': /* use old dbm interface + rfc822ize */ + dbzint = 0; + break; + case 'U': /* make base unopenable during init */ + unopen = 1; + break; + case 'C': /* change directories before dbmclose */ + change = optarg; + break; + case 'd': /* Debugging. */ + if (dbzdebug(1) < 0) + fail("dbz debugging not available", ""); + break; + case '?': + default: + errflg++; + break; + } + if (errflg || optind >= argc || (optind+1 < argc && baseinput)) { + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-a] [-x] [-c] database [file] ...\n"); + exit(2); + } + + (void) dbzincore(useincore); + my_basename = argv[optind]; + pagname = str2dup(my_basename, ".pag"); + dir_name = str2dup(my_basename, ".dir"); + mkfiles(); + optind++; + + if (baseinput) /* implies no further arguments */ + process(base, my_basename); + else if (optind >= argc) + process(stdin, "stdin"); + else + for (; optind < argc; optind++) + dofile(argv[optind]); + + if (change != NULL) + (void) chdir(change); + if (dbmclose() < 0) + fail("dbmclose failed", ""); + if (doruns) + runs(pagname); + if (sweep) + dosweep(my_basename, pagname); + if (printx) + printf("%ld\n", xxx); +#ifdef DBZ_FINISH + DBZ_FINISH; +#endif + exit(0); +} + +/* + - dofile - open a file and invoke process() + */ +void +dofile(name) +char *name; +{ + register FILE *in; + + if (STREQ(name, "-")) + process(stdin, "-"); + else { + in = fopen(name, "r"); + if (in == NULL) + fail("cannot open `%s'", name); + process(in, name); + (void) fclose(in); + } +} + +/* + - mkfiles - create empty files and open them up + */ +void +mkfiles() +{ + if (op == 'b' && !dbzint) { + crfile(dir_name); + crfile(pagname); + } + + base = fopen(my_basename, (op == 'a') ? "a" : "r"); + if (base == NULL) + fail("cannot open `%s'", my_basename); + if (unopen) + (void) chmod(my_basename, 0); + if (from != NULL) { + if (dbzagain(my_basename, from) < 0) + fail("dbzagain(`%s'...) failed", my_basename); + } else if (op == 'b' && dbzint) { + if (!exact) + siz = dbzsize(siz); + if (dbzfresh(my_basename, siz, (int)fs, map, tag) < 0) + fail("dbzfresh(`%s'...) failed", my_basename); + } else if (dbminit(my_basename) < 0) + fail("dbminit(`%s') failed", my_basename); + if (unopen) + (void) chmod(my_basename, 0600); /* hard to restore original */ +} + +/* + - crfile - create a file + */ +void +crfile(name) +char *name; +{ + register int f; + + f = creat(name, 0666); + if (f < 0) + fail("cannot create `%s'", name); + (void) close(f); +} + +/* + - process - process input file + */ +void +process(in, name) +FILE *in; +char *name; +{ + register off_t place; + + inname = name; + lineno = 0; + + for (;;) { + place = ftell(in); + if (fgets(line, buflen, in) == NULL) + return; + lineno++; + if (every > 0 && lineno%every == 0) { + fprintf(stderr, "%ld\n", lineno); + if (dbzsync() < 0) + fail("dbzsync failed", ""); + } + doline(line, place); + } + /* NOTREACHED */ +} + +/* + - doline - process input line + */ +void +doline(lp, inoffset) +char *lp; +off_t inoffset; +{ + register char *p; + register char pc; + datum key, value; + off_t place = inoffset; + register int shouldfind; + register int llen; + char keytext[DBZMAXKEY+1]; + + p = NULL; + if (fs != '\0') + p = strchr(lp, fs); + if (p == NULL) + p = lp + strlen(lp); + if (p > lp && *(p-1) == '\n') + p--; + if (p - lp > DBZMAXKEY) + fail("key of `%.40s...' too long", lp); + pc = *p; + *p = '\0'; + (void) strcpy(keytext, lp); + *p = pc; + key.dptr = (dbzint) ? keytext : rfc822ize(keytext); + key.dsize = strlen(keytext)+1; + + switch (op) { + case 'a': + place = ftell(base); + llen = strlen(lp); + if (fwrite(lp, 1, llen, base) != llen) + fail("write error in `%s'", my_basename); + /* FALLTHROUGH */ + case 'b': + if (omitzero && p != NULL && *(p+1) == '0') + return; + if (unique) { + value = (dbzint) ? dbzfetch(key) : fetch(key); + if (value.dptr != NULL) + fail("`%.40s...' already present", lp); + } + value.dptr = (char *)&place; + value.dsize = (int)sizeof(off_t); + if (((dbzint) ? dbzstore(key, value) : store(key, value)) < 0) + fail("store failed on `%.40s...'", lp); + break; + case 'c': + value = (dbzint) ? dbzfetch(key) : fetch(key); + shouldfind = (omitzero && p != NULL && *(p+1) == '0') ? 0 : 1; + if (!shouldfind && (value.dptr != NULL || value.dsize != 0)) + fail("`%.40s...' found, shouldn't be", lp); + if (shouldfind && (value.dptr == NULL || + value.dsize != sizeof(off_t))) + fail("can't find `%.40s...'", lp); + if (shouldfind && !quick) { + (void) memcpy((char *)&place, value.dptr, sizeof(off_t)); + if (place != inoffset) + fail("offset mismatch on `%.40s...'", lp); + if (fseek(base, place, SEEK_SET) == -1) + fail("fseek failed on `%.40s...'", lp); + if (fgets(cmp, buflen, base) == NULL) + fail("can't read line for `%.40s...'", lp); + if (!STREQ(lp, cmp)) + fail("compare failed on `%.40s...'", lp); + } + break; + case 'x': + value = (dbzint) ? dbzfetch(key) : fetch(key); + if (value.dptr != NULL && !quick) { + (void) memcpy((char *)&place, value.dptr, sizeof(off_t)); + if (fseek(base, place, SEEK_SET) == -1) + fail("fseek failed on `%.40s...'", lp); + if (fgets(cmp, buflen, base) == NULL) + fail("can't read line for `%.40s...'", lp); + fputs(cmp, stdout); + } else if (value.dptr != NULL) + fputs(lp, stdout); + break; + case 'm': + value = (dbzint) ? dbzfetch(key) : fetch(key); + if (value.dptr == NULL) { + fputs(keytext, stdout); + putchar('\n'); + } + break; + default: + fail("unknown operator -- can't happen", ""); + break; + } +} + +/* + - runs - print run statistics + */ +void +runs(file) +char *file; +{ + register FILE *fd; + off_t it; + register long run; + + fd = fopen(file, "r"); + if (fd == NULL) + fail("cannot reopen `%s'", file); + run = 0; + while (fread((char *)&it, sizeof(off_t), 1, fd) == 1) { + if (it != 0) + run++; + else if (run > 0) { + printf("%ld\n", run); + run = 0; + } + } + (void) fclose(fd); +} + +/* + - dosweep - sweep pag file checking for valid offsets + */ +void +dosweep(fn, pn) +char *fn; +char *pn; +{ + register FILE *pf; + off_t it; + char nl; + register FILE *hf; + + hf = fopen(fn, "r"); + if (hf == NULL) + fail("cannot reopen `%s'", fn); + pf = fopen(pn, "r"); + if (pf == NULL) + fail("cannot reopen `%s'", pn); + while (fread((char *)&it, sizeof(off_t), 1, pf) == 1) { + it = (it & ((off_t)0x80000000)) ? (it&~((off_t)0xff000000)) : it; + if (it != 0 && it != 1) { /* 0 empty, 1 known okay */ + it--; /* get rid of bias */ + (void) fseek(hf, it-1, SEEK_SET); + nl = getc(hf); + if (nl != '\n') + fprintf(stderr, "offset 0%lo does not point to line\n", + (long)it); + } + } + (void) fclose(hf); + (void) fclose(pf); +} + +/* + - fail - complain and die + */ +void +fail(s1, s2) +char *s1; +char *s2; +{ + fprintf(stderr, "%s: (file `%s', line %ld) ", progname, inname, lineno); + fprintf(stderr, s1, s2); + fprintf(stderr, "\n"); + exit(1); +} + +/* + - str2dup - concatenate strings and malloc result + */ +char * +str2dup(s1, s2) +char *s1; +char *s2; +{ + register char *p; + + p = malloc((size_t)strlen(s1) + strlen(s2) + 1); + if (p == NULL) + fail("can't allocate space for strings", ""); + (void) strcpy(p, s1); + (void) strcat(p, s2); + return(p); +} diff --git a/libio/dbz/fake.c b/libio/dbz/fake.c new file mode 100644 index 00000000000..ed2a2b75937 --- /dev/null +++ b/libio/dbz/fake.c @@ -0,0 +1,144 @@ +/* + * fake - make up random lines resembling history-file entries, reproducibly + * + * -Log- + */ + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + +#define MAXSTR 500 /* For sizing strings -- DON'T use BUFSIZ! */ +#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) + +#ifndef lint +static char RCSid[] = "$Header: /rel/cvsfiles/devo/libio/dbz/fake.c,v 1.2 1993/10/25 20:02:42 bothner Exp $"; +#endif + +int midonly = 0; /* just message ids, rest not realistic */ +int tag = 0; /* tag lines with random digit for later use */ +int expired = -1; /* percentage of lines to be expired */ + +int debug = 0; +char *progname; + +char *inname; /* filename for messages etc. */ +long lineno; /* line number for messages etc. */ + +void doline(); +void addchars(); +void seed(); + +/* + - main - parse arguments and handle options + */ +int +main(argc, argv) +int argc; +char *argv[]; +{ + int c; + int errflg = 0; + FILE *in; + struct stat statbuf; + extern int optind; + extern char *optarg; + void process(); + register long no; + extern long atol(); + char line[MAXSTR]; + + progname = argv[0]; + + while ((c = getopt(argc, argv, "ms:te:d")) != EOF) + switch (c) { + case 'm': /* message-ids only */ + midonly = 1; + break; + case 's': /* seed */ + seed(atol(optarg)); + break; + case 't': /* tag lines with a random digit */ + tag = 1; + break; + case 'e': /* percentage to be expired */ + expired = atoi(optarg); + break; + case 'd': /* Debugging. */ + debug++; + break; + case '?': + default: + errflg++; + break; + } + if (errflg || optind != argc - 1) { + fprintf(stderr, "usage: %s ", progname); + fprintf(stderr, "[-m] [-s seed] length\n"); + exit(2); + } + + for (no = atol(argv[optind]); no > 0; no--) { + doline(line); + puts(line); + } +#ifdef DBZ_FINISH + DBZ_FINISH; +#endif + exit(0); +} + +/* + - doline - generate random history pseudo-line + */ +void +doline(buf) +char *buf; +{ + char tagch[2]; + + (void) strcpy(buf, "<"); + addchars(buf, range(4, 20)); + (void) strcat(buf, "@"); + addchars(buf, range(8, 20)); + if (midonly) + (void) strcat(buf, ">\tx"); + else { + if (tag) { + tagch[0] = "1234567890"[range(0,9)]; + tagch[1] = '\0'; + (void) strcat(buf, ">\t"); + (void) strcat(buf, tagch); + (void) strcat(buf, "00000000~-"); + } else + (void) strcat(buf, ">\t1234567890~-"); + } + if (range(1, 100) > expired) { + if (midonly) + (void) strcat(buf, "\tx"); + else { + (void) strcat(buf, "\t"); + addchars(buf, range(10, 30)); + } + } +} + +/* + - addchars - generate n random characters suitable for history file + */ +void +addchars(buf, len) +char *buf; +int len; +{ + register int i; + register char *p = buf + strlen(buf); + static char vocab[] = "1234567890.abcde.fghij.klmno.pqrst.uvwxyz.\ +1234567890.ABCDE.FGHIJ.KLMNO.PQRST.UVWXYZ.1234567890.\ +1234567890.abcde.fghij.klmno.pqrst.uvwxyz.1234567890"; + + for (i = len; i > 0; i--) + *p++ = vocab[range(0, sizeof(vocab)-2)]; + *p++ = '\0'; +} diff --git a/libio/dbz/firstlast25 b/libio/dbz/firstlast25 new file mode 100644 index 00000000000..4850468c0c2 --- /dev/null +++ b/libio/dbz/firstlast25 @@ -0,0 +1,50 @@ +<m....VO1.9q.@s95e1zKsIj7LrIwa1> 600000000~- 90fz0706yo.1Env21x8b +<H5.i.R6ZQ2@Vg6.5mqj8..z> 200000000~- +<1Hy.ufmjqe371x5.o@HEEl0tAp4> 700000000~- +<T6.c9.xM4i@943..7z.c..3h> 600000000~- +<Exus7LsME4fPL9v8@2.ouu97O25z9cdft> 700000000~- +<6kUzkf.v74@iC1iGj882RQ0zli> 400000000~- +<J.7YT7dV.Kkul8Bh0fc@Rar.EnMx2lm0.6Yeob> 600000000~- +<.wVJi1DX42@5.4i6.jaZ6qw9Ln1.> 500000000~- +<uUd9e18vxzevae7uY@33a480208l0.4p2q> 300000000~- +<43hQ.5shbE7@912400.ajES6x0sXl.M> 400000000~- +<g25r..2r.0WOZ6k3@tb3.U9xrR.uw61a2y0> 600000000~- +<923s5e67d5Oq085Y.1@6Pik68584> 900000000~- +<.5.n5cx5aD62i9q8@Ai60Sc.4x> 200000000~- +<9N9n@3.1ql87.yj2xFs.zLqI> 700000000~- Q2.kni8kZps7kF5uiEv32B38y4z.p +<.X.fw.6LtoT.0@pp6bp.5s6yh74.> 400000000~- +<54c1w@7..u1.99m9T4j.BNGBiK> 600000000~- .F3hb.OFh06V..p +<j12Mtn6q9@m2.m1X1s> 500000000~- +<o1WJV9G4H.zf0BX44w@W7.76xn33> 000000000~- +<0C605s6plaAgfM.ap40@e6d66n.uv01W.j.8ph.> 100000000~- m.x7TY8.8DQ5 +<.2.14xdn.@D0g.W.uZ.75gyyg.q1G> 100000000~- +<.A..03.@5v..64.5v3.3tbjUo.> 500000000~- +<72..c19ms65.WCf0G3.@83seEG9nnhM.O.j22> 900000000~- +<D..xX.kti9@u739li.xvy2> 000000000~- NPLL42XVfM +<6HO.nFal1ufl3.8b@3.n0k7a.IDgNy> 700000000~- Wv4j3Itccnh0Zp3 +<x5RjUnIpd03xBBnuN@z0puc82Q26Ou.0T6> 400000000~- k67.hvXwv6X745R4rh2ybuFN3n. +<62dIeg.fW92.ov375@x76mf5c6.37.v> 000000000~- +<chdpqs.0mgZOp.@Dxl9v..94e7ar2> 900000000~- +<.9Xr.7V91..oe5CG.hX@p5x3jos3s27R6O3yj1> 400000000~- +<Mm6dr.231dH35ua@SE1u0za3V1M43lRn9> 100000000~- +<JIhw2@.Qdf.8v28Tnf1M> 200000000~- +<z4FCa.q4MF..EE0.2@W9U63e33h9w3lcFFl> 400000000~- +<87.W3r6is4.@svVqQCBiNqz400A.qwj> 200000000~- +<0liI7Lu0Mx435m7M99@87Xw.8j63.9.> 500000000~- +<tRtht5M.6d0@06gj.qm3.s9> 200000000~- e27S.BKVD70P.o +<Jpga8@m68yvw.b4b> 200000000~- +<.2.69hy3JT1@Aq3.r83o.9> 700000000~- +<.W7EurYppo4fhzs.I@8651m2W7v> 700000000~- +<3m02.@22074.a5ct2j3> 900000000~- +<.fy9Epa@.1.kNGCNokFwB8ezo1WM> 800000000~- +<c758d64.FS4yY7L5@43sw.kI6> 900000000~- +<vLd0.t@.kq70oHl96ixdnXd.GVv> 100000000~- 9A6Ejq5t55I4VJ6.q1 +<d3.4@n17p4N.77N7W..7.8> 300000000~- +<f2lv064.8@4jokk3e07> 400000000~- +<rr7hoxA.U7.JXxnpvd@1rbMO437vHnakx> 000000000~- +<.0p3G7novlrYz9kjI@Sx.2w.yqzerZl12781.k> 700000000~- +<51ny.pQ7ay4@nfU2l1f0ixG09584.m> 000000000~- 38K5bhK7cr6.bg.5MlC2Fxq06Ziuw. +<2.cau.9s@.n4Pk0Jd9g> 300000000~- +<bEH1Bwa.662i@zm.3g.gx4.lp3> 300000000~- c8.t4Q0.8t0.m50 +<.t13789u5AqM4m3.z0T@P17e.ypf> 200000000~- q17z.fZ3.FyD533WthqZs8q7 +<M4r1I@Ovaev.dp> 100000000~- diff --git a/libio/dbz/getmap b/libio/dbz/getmap new file mode 100755 index 00000000000..fd746cad7d9 --- /dev/null +++ b/libio/dbz/getmap @@ -0,0 +1,6 @@ +#!/bin/sh +awk 'NR == 1 { + for (i = 9; i <= NF; i++) + printf "%s ", $i + printf "\n" +}' $* diff --git a/libio/dbz/random.c b/libio/dbz/random.c new file mode 100644 index 00000000000..1d8de3a2b44 --- /dev/null +++ b/libio/dbz/random.c @@ -0,0 +1,31 @@ +/* + * random-number generator for testing + */ +static unsigned long next = 1; + +/* + - range - generate a random number within an inclusive range + * + * Algorithm from ANSI C standard. Limitation: max-min <= 32767. + */ +int +range(min, max) +int min; +int max; +{ + register int temp; + + next = next * 1103515245 + 12345; + temp = (int)((next/65536)%32768); + return(temp%(max - min + 1) + min); +} + +/* + - seed - seed random number generator + */ +void +seed(n) +long n; +{ + next = (unsigned long)n; +} diff --git a/libio/dbz/revbytes b/libio/dbz/revbytes new file mode 100644 index 00000000000..b3d80c2685f --- /dev/null +++ b/libio/dbz/revbytes @@ -0,0 +1,7 @@ +NR == 1 { + printf "%s %s %s %s %s %s %s %s %s", $1, $2, $3, $4, $5, $6, $7, $8, $9 + for (i = NF; i > 9; i--) + printf " %s", $i + printf "\n" +} +NR > 1 { print } diff --git a/libio/dbz/stdio.h b/libio/dbz/stdio.h new file mode 100644 index 00000000000..80faee30ad9 --- /dev/null +++ b/libio/dbz/stdio.h @@ -0,0 +1 @@ +#include "../iostdio.h" |