From 4d396858eb897f2bb6d318b851a369ee92e50147 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Thu, 27 Mar 2014 22:49:58 +0200 Subject: Add input parser to readfile extension, document it and test it. --- NEWS | 5 +- doc/gawk.info | 157 ++++++++++++++++++++++++++----------------------- doc/gawk.texi | 8 ++- doc/gawktexi.in | 8 ++- extension/ChangeLog | 6 ++ extension/readfile.3am | 20 ++++++- extension/readfile.c | 148 +++++++++++++++++++++++++++++++++++++++++----- test/ChangeLog | 5 ++ test/Makefile.am | 8 ++- test/Makefile.in | 8 ++- test/readfile2.awk | 12 ++++ test/readfile2.ok | 21 +++++++ 12 files changed, 308 insertions(+), 98 deletions(-) create mode 100644 test/readfile2.awk create mode 100644 test/readfile2.ok diff --git a/NEWS b/NEWS index c6a4f216..423a46f0 100644 --- a/NEWS +++ b/NEWS @@ -45,7 +45,10 @@ Changes from 4.1.0 to 4.1.1 If the thousands separator is a string, it will be correctly added to decimal numbers. -14. A number of bugs have been fixed. See the ChangeLog. +14. The readfile extension now has an input parser that will read whole + files as a single record. + +15. A number of bugs have been fixed. See the ChangeLog. Changes from 4.0.2 to 4.1.0 --------------------------- diff --git a/doc/gawk.info b/doc/gawk.info index ea2a718f..6bff2719 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -24830,7 +24830,8 @@ File: gawk.info, Node: Extension Sample Readfile, Next: Extension Sample API T 16.7.10 Reading An Entire File ------------------------------ -The `readfile' extension adds a single function named `readfile()': +The `readfile' extension adds a single function named `readfile()', and +an input parser: `@load "readfile"' This is how you load the extension. @@ -24840,6 +24841,12 @@ The `readfile' extension adds a single function named `readfile()': a string containing the entire contents of the requested file. Upon error, the function returns the empty string and sets `ERRNO'. +`BEGIN { PROCINFO["readfile"] = 1 }' + In addition, the extension adds an input parser that is activated + if `PROCINFO["readfile"]' exists. When activated, each input file + is returned in its entirety as `$0'. `RT' is set to the null + string. + Here is an example: @load "readfile" @@ -32421,7 +32428,7 @@ Index * readdir extension: Extension Sample Readdir. (line 9) * readfile() extension function: Extension Sample Readfile. - (line 11) + (line 12) * readfile() user-defined function: Readfile Function. (line 30) * reading input files: Reading Files. (line 6) * recipe for a programming language: History. (line 6) @@ -33536,78 +33543,78 @@ Node: Extension Sample Revout994851 Node: Extension Sample Rev2way995444 Node: Extension Sample Read write array996134 Node: Extension Sample Readfile998017 -Node: Extension Sample API Tests998835 -Node: Extension Sample Time999360 -Node: gawkextlib1000724 -Node: Language History1003505 -Node: V7/SVR3.11005098 -Node: SVR41007418 -Node: POSIX1008860 -Node: BTL1010246 -Node: POSIX/GNU1010980 -Node: Feature History1016579 -Node: Common Extensions1029555 -Node: Ranges and Locales1030867 -Ref: Ranges and Locales-Footnote-11035484 -Ref: Ranges and Locales-Footnote-21035511 -Ref: Ranges and Locales-Footnote-31035745 -Node: Contributors1035966 -Node: Installation1041347 -Node: Gawk Distribution1042241 -Node: Getting1042725 -Node: Extracting1043551 -Node: Distribution contents1045243 -Node: Unix Installation1050948 -Node: Quick Installation1051565 -Node: Additional Configuration Options1054011 -Node: Configuration Philosophy1055747 -Node: Non-Unix Installation1058101 -Node: PC Installation1058559 -Node: PC Binary Installation1059858 -Node: PC Compiling1061706 -Node: PC Testing1064650 -Node: PC Using1065826 -Node: Cygwin1069994 -Node: MSYS1070803 -Node: VMS Installation1071317 -Node: VMS Compilation1072081 -Ref: VMS Compilation-Footnote-11073333 -Node: VMS Dynamic Extensions1073391 -Node: VMS Installation Details1074764 -Node: VMS Running1077015 -Node: VMS GNV1079849 -Node: VMS Old Gawk1080572 -Node: Bugs1081042 -Node: Other Versions1084960 -Node: Notes1091044 -Node: Compatibility Mode1091844 -Node: Additions1092627 -Node: Accessing The Source1093554 -Node: Adding Code1094994 -Node: New Ports1101039 -Node: Derived Files1105174 -Ref: Derived Files-Footnote-11110495 -Ref: Derived Files-Footnote-21110529 -Ref: Derived Files-Footnote-31111129 -Node: Future Extensions1111227 -Node: Implementation Limitations1111810 -Node: Extension Design1113062 -Node: Old Extension Problems1114216 -Ref: Old Extension Problems-Footnote-11115724 -Node: Extension New Mechanism Goals1115781 -Ref: Extension New Mechanism Goals-Footnote-11119146 -Node: Extension Other Design Decisions1119332 -Node: Extension Future Growth1121438 -Node: Old Extension Mechanism1122274 -Node: Basic Concepts1124014 -Node: Basic High Level1124695 -Ref: figure-general-flow1124966 -Ref: figure-process-flow1125565 -Ref: Basic High Level-Footnote-11128794 -Node: Basic Data Typing1128979 -Node: Glossary1132334 -Node: Copying1157563 -Node: GNU Free Documentation License1195120 -Node: Index1220257 +Node: Extension Sample API Tests999117 +Node: Extension Sample Time999642 +Node: gawkextlib1001006 +Node: Language History1003787 +Node: V7/SVR3.11005380 +Node: SVR41007700 +Node: POSIX1009142 +Node: BTL1010528 +Node: POSIX/GNU1011262 +Node: Feature History1016861 +Node: Common Extensions1029837 +Node: Ranges and Locales1031149 +Ref: Ranges and Locales-Footnote-11035766 +Ref: Ranges and Locales-Footnote-21035793 +Ref: Ranges and Locales-Footnote-31036027 +Node: Contributors1036248 +Node: Installation1041629 +Node: Gawk Distribution1042523 +Node: Getting1043007 +Node: Extracting1043833 +Node: Distribution contents1045525 +Node: Unix Installation1051230 +Node: Quick Installation1051847 +Node: Additional Configuration Options1054293 +Node: Configuration Philosophy1056029 +Node: Non-Unix Installation1058383 +Node: PC Installation1058841 +Node: PC Binary Installation1060140 +Node: PC Compiling1061988 +Node: PC Testing1064932 +Node: PC Using1066108 +Node: Cygwin1070276 +Node: MSYS1071085 +Node: VMS Installation1071599 +Node: VMS Compilation1072363 +Ref: VMS Compilation-Footnote-11073615 +Node: VMS Dynamic Extensions1073673 +Node: VMS Installation Details1075046 +Node: VMS Running1077297 +Node: VMS GNV1080131 +Node: VMS Old Gawk1080854 +Node: Bugs1081324 +Node: Other Versions1085242 +Node: Notes1091326 +Node: Compatibility Mode1092126 +Node: Additions1092909 +Node: Accessing The Source1093836 +Node: Adding Code1095276 +Node: New Ports1101321 +Node: Derived Files1105456 +Ref: Derived Files-Footnote-11110777 +Ref: Derived Files-Footnote-21110811 +Ref: Derived Files-Footnote-31111411 +Node: Future Extensions1111509 +Node: Implementation Limitations1112092 +Node: Extension Design1113344 +Node: Old Extension Problems1114498 +Ref: Old Extension Problems-Footnote-11116006 +Node: Extension New Mechanism Goals1116063 +Ref: Extension New Mechanism Goals-Footnote-11119428 +Node: Extension Other Design Decisions1119614 +Node: Extension Future Growth1121720 +Node: Old Extension Mechanism1122556 +Node: Basic Concepts1124296 +Node: Basic High Level1124977 +Ref: figure-general-flow1125248 +Ref: figure-process-flow1125847 +Ref: Basic High Level-Footnote-11129076 +Node: Basic Data Typing1129261 +Node: Glossary1132616 +Node: Copying1157845 +Node: GNU Free Documentation License1195402 +Node: Index1220539  End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 6c41c99a..139af5f7 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -33173,7 +33173,7 @@ ret = reada("arraydump.bin", array) @subsection Reading An Entire File The @code{readfile} extension adds a single function -named @code{readfile()}: +named @code{readfile()}, and an input parser: @table @code @item @@load "readfile" @@ -33184,6 +33184,12 @@ This is how you load the extension. The argument is the name of the file to read. The return value is a string containing the entire contents of the requested file. Upon error, the function returns the empty string and sets @code{ERRNO}. + +@item BEGIN @{ PROCINFO["readfile"] = 1 @} +In addition, the extension adds an input parser that is activated if +@code{PROCINFO["readfile"]} exists. +When activated, each input file is returned in its entirety as @code{$0}. +@code{RT} is set to the null string. @end table Here is an example: diff --git a/doc/gawktexi.in b/doc/gawktexi.in index db949be2..dfdf3434 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -32314,7 +32314,7 @@ ret = reada("arraydump.bin", array) @subsection Reading An Entire File The @code{readfile} extension adds a single function -named @code{readfile()}: +named @code{readfile()}, and an input parser: @table @code @item @@load "readfile" @@ -32325,6 +32325,12 @@ This is how you load the extension. The argument is the name of the file to read. The return value is a string containing the entire contents of the requested file. Upon error, the function returns the empty string and sets @code{ERRNO}. + +@item BEGIN @{ PROCINFO["readfile"] = 1 @} +In addition, the extension adds an input parser that is activated if +@code{PROCINFO["readfile"]} exists. +When activated, each input file is returned in its entirety as @code{$0}. +@code{RT} is set to the null string. @end table Here is an example: diff --git a/extension/ChangeLog b/extension/ChangeLog index 61381171..02c26f3d 100644 --- a/extension/ChangeLog +++ b/extension/ChangeLog @@ -1,3 +1,9 @@ +2014-03-27 Arnold D. Robbins + + * readfile.c: Add an input parser that works off of + PROCINFO["readfile"]. + * readfile.3am: Document same. + 2014-03-23 Arnold D. Robbins * gawkfts.c (MAXPATHLEN): Add a default definition. Thanks to diff --git a/extension/readfile.3am b/extension/readfile.3am index 688d9cd5..0cb2eb5b 100644 --- a/extension/readfile.3am +++ b/extension/readfile.3am @@ -1,4 +1,4 @@ -.TH READFILE 3am "Jan 15 2013" "Free Software Foundation" "GNU Awk Extension Modules" +.TH READFILE 3am "Mar 24 2013" "Free Software Foundation" "GNU Awk Extension Modules" .SH NAME readfile \- return the entire contents of a file as a string .SH SYNOPSIS @@ -6,6 +6,14 @@ readfile \- return the entire contents of a file as a string @load "readfile" .sp result = readfile("/some/path") +.sp +.ft R +For making whole files be single records: +.sp +.ft CW +@load "readfile" +.br +BEGIN { PROCINFO["readfile"] = 1 } .ft R .SH DESCRIPTION The @@ -18,6 +26,14 @@ the requested file. .PP Upon error, the function returns the empty string and sets .BR ERRNO . +.PP +In addition, it adds an input parser that is activated if +.ft CW +PROCINFO["readfile"] +.ft R +exists. +When activated, each input file is returned in its entirety as \f(CW$0\fR. +\f(CWRT\fP is set to the null string. ... .SH NOTES ... .SH BUGS .SH EXAMPLE @@ -47,7 +63,7 @@ if (contents == "" && ERRNO != "") { Arnold Robbins, .BR arnold@skeeve.com . .SH COPYING PERMISSIONS -Copyright \(co 2012, 2013, +Copyright \(co 2012, 2013, 2014, Free Software Foundation, Inc. .PP Permission is granted to make and distribute verbatim copies of diff --git a/extension/readfile.c b/extension/readfile.c index 71d67ee6..67fa9eca 100644 --- a/extension/readfile.c +++ b/extension/readfile.c @@ -61,11 +61,39 @@ static const gawk_api_t *api; /* for convenience macros to work */ static awk_ext_id_t *ext_id; -static const char *ext_version = "readfile extension: version 1.0"; -static awk_bool_t (*init_func)(void) = NULL; +static const char *ext_version = "readfile extension: version 2.0"; +static awk_bool_t init_readfile(); +static awk_bool_t (*init_func)(void) = init_readfile; int plugin_is_GPL_compatible; +/* read_file_to_buffer --- handle the mechanics of reading the file */ + +static char * +read_file_to_buffer(int fd, const struct stat *sbuf) +{ + char *text = NULL; + int ret; + + if ((sbuf->st_mode & S_IFMT) != S_IFREG) { + errno = EINVAL; + update_ERRNO_int(errno); + goto done; + } + + emalloc(text, char *, sbuf->st_size + 2, "do_readfile"); + memset(text, '\0', sbuf->st_size + 2); + + if ((ret = read(fd, text, sbuf->st_size)) != sbuf->st_size) { + update_ERRNO_int(errno); + gawk_free(text); + text = NULL; + /* fall through to return */ + } +done: + return text; +} + /* do_readfile --- read a file into memory */ static awk_value_t * @@ -90,10 +118,6 @@ do_readfile(int nargs, awk_value_t *result) if (ret < 0) { update_ERRNO_int(errno); goto done; - } else if ((sbuf.st_mode & S_IFMT) != S_IFREG) { - errno = EINVAL; - update_ERRNO_int(errno); - goto done; } if ((fd = open(filename.str_value.str, O_RDONLY|O_BINARY)) < 0) { @@ -101,15 +125,9 @@ do_readfile(int nargs, awk_value_t *result) goto done; } - emalloc(text, char *, sbuf.st_size + 2, "do_readfile"); - memset(text, '\0', sbuf.st_size + 2); - - if ((ret = read(fd, text, sbuf.st_size)) != sbuf.st_size) { - (void) close(fd); - update_ERRNO_int(errno); - gawk_free(text); - goto done; - } + text = read_file_to_buffer(fd, & sbuf); + if (text == NULL) + goto done; /* ERRNO already updated */ close(fd); make_malloced_string(text, sbuf.st_size, result); @@ -117,12 +135,110 @@ do_readfile(int nargs, awk_value_t *result) } else if (do_lint) lintwarn(ext_id, _("readfile: called with no arguments")); - done: /* Set the return value */ return result; } +/* readfile_get_record --- read the whole file as one record */ + +static int +readfile_get_record(char **out, awk_input_buf_t *iobuf, int *errcode, + char **rt_start, size_t *rt_len) +{ + char *text; + + /* + * The caller sets *errcode to 0, so we should set it only if an + * error occurs. + */ + + if (out == NULL || iobuf == NULL) + return EOF; + + if (iobuf->opaque != NULL) { + /* + * Already read the whole file, + * free up stuff and return EOF + */ + gawk_free(iobuf->opaque); + iobuf->opaque = NULL; + return EOF; + } + + /* read file */ + text = read_file_to_buffer(iobuf->fd, & iobuf->sbuf); + if (text == NULL) + return EOF; + + /* set up the iobuf for next time */ + iobuf->opaque = text; + + /* set return values */ + *rt_start = NULL; + *rt_len = 0; + *out = text; + + /* return count */ + return iobuf->sbuf.st_size; +} + +/* readfile_can_take_file --- return true if we want the file */ + +static awk_bool_t +readfile_can_take_file(const awk_input_buf_t *iobuf) +{ + awk_value_t array, index, value; + + if (iobuf == NULL) + return awk_false; + + /* + * This could fail if PROCINFO isn't referenced from + * the awk program. It's not a "can't happen" error. + */ + if (! sym_lookup("PROCINFO", AWK_ARRAY, & array)) { + return awk_false; + } + + (void) make_const_string("readfile", 8, & index); + + if (! get_array_element(array.array_cookie, & index, AWK_UNDEFINED, & value)) { + return awk_false; + } + + return awk_true; +} + +/* readfile_take_control_of --- take over the file */ + +static awk_bool_t +readfile_take_control_of(awk_input_buf_t *iobuf) +{ + if (iobuf == NULL) + return awk_false; + + iobuf->get_record = readfile_get_record; + return awk_true; +} + +static awk_input_parser_t readfile_parser = { + "readfile", + readfile_can_take_file, + readfile_take_control_of, + NULL +}; + +/* init_readfile --- set things up */ + +static awk_bool_t +init_readfile() +{ + register_input_parser(& readfile_parser); + + return awk_true; +} + static awk_ext_func_t func_table[] = { { "readfile", do_readfile, 1 }, }; diff --git a/test/ChangeLog b/test/ChangeLog index ab7ff0f2..f8d9f943 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2014-03-27 Arnold D. Robbins + + * Makefile.am (readfile2): New test. + * readfile2.awk, readfile2.ok: New files. + 2014-02-28 Arnold D. Robbins * regrange.ok: Update after code improvements. diff --git a/test/Makefile.am b/test/Makefile.am index ca39e0b8..d92f358e 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -708,6 +708,7 @@ EXTRA_DIST = \ range1.ok \ readdir.awk \ readdir0.awk \ + readfile2.awk \ rebt8b1.awk \ rebt8b1.ok \ rebt8b2.awk \ @@ -1019,7 +1020,7 @@ LOCALE_CHARSET_TESTS = \ SHLIB_TESTS = \ fnmatch filefuncs fork fork2 fts functab4 inplace1 inplace2 inplace3 \ - ordchr ordchr2 readdir readfile revout revtwoway rwarray testext time + ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time # List of the tests which should be run with --lint option: NEED_LINT = \ @@ -1734,6 +1735,11 @@ readfile:: @$(AWK) -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) Makefile _$@ && rm -f _$@ || cp -p Makefile $@.ok +readfile2:: + @echo $@ + @$(AWK) -f "$(srcdir)"/$@.awk "$(srcdir)"/$@.awk "$(srcdir)"/readdir.awk > _$@ || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + include2:: @echo $@ @AWKPATH="$(srcdir)" $(AWK) --include inclib 'BEGIN {print sandwich("a", "b", "c")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Makefile.in b/test/Makefile.in index 52fa49ef..6e1bcbb3 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -954,6 +954,7 @@ EXTRA_DIST = \ range1.ok \ readdir.awk \ readdir0.awk \ + readfile2.awk \ rebt8b1.awk \ rebt8b1.ok \ rebt8b2.awk \ @@ -1261,7 +1262,7 @@ LOCALE_CHARSET_TESTS = \ SHLIB_TESTS = \ fnmatch filefuncs fork fork2 fts functab4 inplace1 inplace2 inplace3 \ - ordchr ordchr2 readdir readfile revout revtwoway rwarray testext time + ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time # List of the tests which should be run with --lint option: @@ -2158,6 +2159,11 @@ readfile:: @$(AWK) -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) Makefile _$@ && rm -f _$@ || cp -p Makefile $@.ok +readfile2:: + @echo $@ + @$(AWK) -f "$(srcdir)"/$@.awk "$(srcdir)"/$@.awk "$(srcdir)"/readdir.awk > _$@ || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + include2:: @echo $@ @AWKPATH="$(srcdir)" $(AWK) --include inclib 'BEGIN {print sandwich("a", "b", "c")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/readfile2.awk b/test/readfile2.awk new file mode 100644 index 00000000..c21483fc --- /dev/null +++ b/test/readfile2.awk @@ -0,0 +1,12 @@ +@load "readfile" +BEGIN { PROCINFO["readfile"] = 1 } +BEGINFILE { print "Start of", basename(FILENAME) } +{ printf ("%d: <%s>\n", FNR, $0 ) } +ENDFILE { print "End of", basename(FILENAME) } + +function basename(file, result) +{ + result = file + gsub(".*/", "", result) + return result +} diff --git a/test/readfile2.ok b/test/readfile2.ok new file mode 100644 index 00000000..be1ded6c --- /dev/null +++ b/test/readfile2.ok @@ -0,0 +1,21 @@ +Start of readfile2.awk +1: <@load "readfile" +BEGIN { PROCINFO["readfile"] = 1 } +BEGINFILE { print "Start of", basename(FILENAME) } +{ printf ("%d: <%s>\n", FNR, $0 ) } +ENDFILE { print "End of", basename(FILENAME) } + +function basename(file, result) +{ + result = file + gsub(".*/", "", result) + return result +} +> +End of readfile2.awk +Start of readdir.awk +1: <@load "readdir" + +{ print } +> +End of readdir.awk -- cgit v1.2.1