summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-03-27 22:49:58 +0200
committerArnold D. Robbins <arnold@skeeve.com>2014-03-27 22:49:58 +0200
commit4d396858eb897f2bb6d318b851a369ee92e50147 (patch)
treed70bd6e9fd6750d0bf67a33374c3864a7ab4ac8d
parent17cb726be0dea75864a89d31054459c02702786e (diff)
downloadgawk-4d396858eb897f2bb6d318b851a369ee92e50147.tar.gz
Add input parser to readfile extension, document it and test it.
-rw-r--r--NEWS5
-rw-r--r--doc/gawk.info157
-rw-r--r--doc/gawk.texi8
-rw-r--r--doc/gawktexi.in8
-rw-r--r--extension/ChangeLog6
-rw-r--r--extension/readfile.3am20
-rw-r--r--extension/readfile.c148
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am8
-rw-r--r--test/Makefile.in8
-rw-r--r--test/readfile2.awk12
-rw-r--r--test/readfile2.ok21
12 files changed, 308 insertions, 98 deletions
diff --git a/NEWS b/NEWS
index c6a4f216..423a46f0 100644
--- a/NEWS
+++ b/NEWS
@@ -45,7 +45,10 @@ Changes from 4.1.0 to 4.1.1
If the thousands separator is a string, it will be correctly added
to decimal numbers.
-14. A number of bugs have been fixed. See the ChangeLog.
+14. The readfile extension now has an input parser that will read whole
+ files as a single record.
+
+15. A number of bugs have been fixed. See the ChangeLog.
Changes from 4.0.2 to 4.1.0
---------------------------
diff --git a/doc/gawk.info b/doc/gawk.info
index ea2a718f..6bff2719 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -24830,7 +24830,8 @@ File: gawk.info, Node: Extension Sample Readfile, Next: Extension Sample API T
16.7.10 Reading An Entire File
------------------------------
-The `readfile' extension adds a single function named `readfile()':
+The `readfile' extension adds a single function named `readfile()', and
+an input parser:
`@load "readfile"'
This is how you load the extension.
@@ -24840,6 +24841,12 @@ The `readfile' extension adds a single function named `readfile()':
a string containing the entire contents of the requested file.
Upon error, the function returns the empty string and sets `ERRNO'.
+`BEGIN { PROCINFO["readfile"] = 1 }'
+ In addition, the extension adds an input parser that is activated
+ if `PROCINFO["readfile"]' exists. When activated, each input file
+ is returned in its entirety as `$0'. `RT' is set to the null
+ string.
+
Here is an example:
@load "readfile"
@@ -32421,7 +32428,7 @@ Index
* readdir extension: Extension Sample Readdir.
(line 9)
* readfile() extension function: Extension Sample Readfile.
- (line 11)
+ (line 12)
* readfile() user-defined function: Readfile Function. (line 30)
* reading input files: Reading Files. (line 6)
* recipe for a programming language: History. (line 6)
@@ -33536,78 +33543,78 @@ Node: Extension Sample Revout994851
Node: Extension Sample Rev2way995444
Node: Extension Sample Read write array996134
Node: Extension Sample Readfile998017
-Node: Extension Sample API Tests998835
-Node: Extension Sample Time999360
-Node: gawkextlib1000724
-Node: Language History1003505
-Node: V7/SVR3.11005098
-Node: SVR41007418
-Node: POSIX1008860
-Node: BTL1010246
-Node: POSIX/GNU1010980
-Node: Feature History1016579
-Node: Common Extensions1029555
-Node: Ranges and Locales1030867
-Ref: Ranges and Locales-Footnote-11035484
-Ref: Ranges and Locales-Footnote-21035511
-Ref: Ranges and Locales-Footnote-31035745
-Node: Contributors1035966
-Node: Installation1041347
-Node: Gawk Distribution1042241
-Node: Getting1042725
-Node: Extracting1043551
-Node: Distribution contents1045243
-Node: Unix Installation1050948
-Node: Quick Installation1051565
-Node: Additional Configuration Options1054011
-Node: Configuration Philosophy1055747
-Node: Non-Unix Installation1058101
-Node: PC Installation1058559
-Node: PC Binary Installation1059858
-Node: PC Compiling1061706
-Node: PC Testing1064650
-Node: PC Using1065826
-Node: Cygwin1069994
-Node: MSYS1070803
-Node: VMS Installation1071317
-Node: VMS Compilation1072081
-Ref: VMS Compilation-Footnote-11073333
-Node: VMS Dynamic Extensions1073391
-Node: VMS Installation Details1074764
-Node: VMS Running1077015
-Node: VMS GNV1079849
-Node: VMS Old Gawk1080572
-Node: Bugs1081042
-Node: Other Versions1084960
-Node: Notes1091044
-Node: Compatibility Mode1091844
-Node: Additions1092627
-Node: Accessing The Source1093554
-Node: Adding Code1094994
-Node: New Ports1101039
-Node: Derived Files1105174
-Ref: Derived Files-Footnote-11110495
-Ref: Derived Files-Footnote-21110529
-Ref: Derived Files-Footnote-31111129
-Node: Future Extensions1111227
-Node: Implementation Limitations1111810
-Node: Extension Design1113062
-Node: Old Extension Problems1114216
-Ref: Old Extension Problems-Footnote-11115724
-Node: Extension New Mechanism Goals1115781
-Ref: Extension New Mechanism Goals-Footnote-11119146
-Node: Extension Other Design Decisions1119332
-Node: Extension Future Growth1121438
-Node: Old Extension Mechanism1122274
-Node: Basic Concepts1124014
-Node: Basic High Level1124695
-Ref: figure-general-flow1124966
-Ref: figure-process-flow1125565
-Ref: Basic High Level-Footnote-11128794
-Node: Basic Data Typing1128979
-Node: Glossary1132334
-Node: Copying1157563
-Node: GNU Free Documentation License1195120
-Node: Index1220257
+Node: Extension Sample API Tests999117
+Node: Extension Sample Time999642
+Node: gawkextlib1001006
+Node: Language History1003787
+Node: V7/SVR3.11005380
+Node: SVR41007700
+Node: POSIX1009142
+Node: BTL1010528
+Node: POSIX/GNU1011262
+Node: Feature History1016861
+Node: Common Extensions1029837
+Node: Ranges and Locales1031149
+Ref: Ranges and Locales-Footnote-11035766
+Ref: Ranges and Locales-Footnote-21035793
+Ref: Ranges and Locales-Footnote-31036027
+Node: Contributors1036248
+Node: Installation1041629
+Node: Gawk Distribution1042523
+Node: Getting1043007
+Node: Extracting1043833
+Node: Distribution contents1045525
+Node: Unix Installation1051230
+Node: Quick Installation1051847
+Node: Additional Configuration Options1054293
+Node: Configuration Philosophy1056029
+Node: Non-Unix Installation1058383
+Node: PC Installation1058841
+Node: PC Binary Installation1060140
+Node: PC Compiling1061988
+Node: PC Testing1064932
+Node: PC Using1066108
+Node: Cygwin1070276
+Node: MSYS1071085
+Node: VMS Installation1071599
+Node: VMS Compilation1072363
+Ref: VMS Compilation-Footnote-11073615
+Node: VMS Dynamic Extensions1073673
+Node: VMS Installation Details1075046
+Node: VMS Running1077297
+Node: VMS GNV1080131
+Node: VMS Old Gawk1080854
+Node: Bugs1081324
+Node: Other Versions1085242
+Node: Notes1091326
+Node: Compatibility Mode1092126
+Node: Additions1092909
+Node: Accessing The Source1093836
+Node: Adding Code1095276
+Node: New Ports1101321
+Node: Derived Files1105456
+Ref: Derived Files-Footnote-11110777
+Ref: Derived Files-Footnote-21110811
+Ref: Derived Files-Footnote-31111411
+Node: Future Extensions1111509
+Node: Implementation Limitations1112092
+Node: Extension Design1113344
+Node: Old Extension Problems1114498
+Ref: Old Extension Problems-Footnote-11116006
+Node: Extension New Mechanism Goals1116063
+Ref: Extension New Mechanism Goals-Footnote-11119428
+Node: Extension Other Design Decisions1119614
+Node: Extension Future Growth1121720
+Node: Old Extension Mechanism1122556
+Node: Basic Concepts1124296
+Node: Basic High Level1124977
+Ref: figure-general-flow1125248
+Ref: figure-process-flow1125847
+Ref: Basic High Level-Footnote-11129076
+Node: Basic Data Typing1129261
+Node: Glossary1132616
+Node: Copying1157845
+Node: GNU Free Documentation License1195402
+Node: Index1220539

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 6c41c99a..139af5f7 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -33173,7 +33173,7 @@ ret = reada("arraydump.bin", array)
@subsection Reading An Entire File
The @code{readfile} extension adds a single function
-named @code{readfile()}:
+named @code{readfile()}, and an input parser:
@table @code
@item @@load "readfile"
@@ -33184,6 +33184,12 @@ This is how you load the extension.
The argument is the name of the file to read. The return value is a
string containing the entire contents of the requested file. Upon error,
the function returns the empty string and sets @code{ERRNO}.
+
+@item BEGIN @{ PROCINFO["readfile"] = 1 @}
+In addition, the extension adds an input parser that is activated if
+@code{PROCINFO["readfile"]} exists.
+When activated, each input file is returned in its entirety as @code{$0}.
+@code{RT} is set to the null string.
@end table
Here is an example:
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index db949be2..dfdf3434 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -32314,7 +32314,7 @@ ret = reada("arraydump.bin", array)
@subsection Reading An Entire File
The @code{readfile} extension adds a single function
-named @code{readfile()}:
+named @code{readfile()}, and an input parser:
@table @code
@item @@load "readfile"
@@ -32325,6 +32325,12 @@ This is how you load the extension.
The argument is the name of the file to read. The return value is a
string containing the entire contents of the requested file. Upon error,
the function returns the empty string and sets @code{ERRNO}.
+
+@item BEGIN @{ PROCINFO["readfile"] = 1 @}
+In addition, the extension adds an input parser that is activated if
+@code{PROCINFO["readfile"]} exists.
+When activated, each input file is returned in its entirety as @code{$0}.
+@code{RT} is set to the null string.
@end table
Here is an example:
diff --git a/extension/ChangeLog b/extension/ChangeLog
index 61381171..02c26f3d 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -1,3 +1,9 @@
+2014-03-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * readfile.c: Add an input parser that works off of
+ PROCINFO["readfile"].
+ * readfile.3am: Document same.
+
2014-03-23 Arnold D. Robbins <arnold@skeeve.com>
* gawkfts.c (MAXPATHLEN): Add a default definition. Thanks to
diff --git a/extension/readfile.3am b/extension/readfile.3am
index 688d9cd5..0cb2eb5b 100644
--- a/extension/readfile.3am
+++ b/extension/readfile.3am
@@ -1,4 +1,4 @@
-.TH READFILE 3am "Jan 15 2013" "Free Software Foundation" "GNU Awk Extension Modules"
+.TH READFILE 3am "Mar 24 2013" "Free Software Foundation" "GNU Awk Extension Modules"
.SH NAME
readfile \- return the entire contents of a file as a string
.SH SYNOPSIS
@@ -6,6 +6,14 @@ readfile \- return the entire contents of a file as a string
@load "readfile"
.sp
result = readfile("/some/path")
+.sp
+.ft R
+For making whole files be single records:
+.sp
+.ft CW
+@load "readfile"
+.br
+BEGIN { PROCINFO["readfile"] = 1 }
.ft R
.SH DESCRIPTION
The
@@ -18,6 +26,14 @@ the requested file.
.PP
Upon error, the function returns the empty string and sets
.BR ERRNO .
+.PP
+In addition, it adds an input parser that is activated if
+.ft CW
+PROCINFO["readfile"]
+.ft R
+exists.
+When activated, each input file is returned in its entirety as \f(CW$0\fR.
+\f(CWRT\fP is set to the null string.
... .SH NOTES
... .SH BUGS
.SH EXAMPLE
@@ -47,7 +63,7 @@ if (contents == "" && ERRNO != "") {
Arnold Robbins,
.BR arnold@skeeve.com .
.SH COPYING PERMISSIONS
-Copyright \(co 2012, 2013,
+Copyright \(co 2012, 2013, 2014,
Free Software Foundation, Inc.
.PP
Permission is granted to make and distribute verbatim copies of
diff --git a/extension/readfile.c b/extension/readfile.c
index 71d67ee6..67fa9eca 100644
--- a/extension/readfile.c
+++ b/extension/readfile.c
@@ -61,11 +61,39 @@
static const gawk_api_t *api; /* for convenience macros to work */
static awk_ext_id_t *ext_id;
-static const char *ext_version = "readfile extension: version 1.0";
-static awk_bool_t (*init_func)(void) = NULL;
+static const char *ext_version = "readfile extension: version 2.0";
+static awk_bool_t init_readfile();
+static awk_bool_t (*init_func)(void) = init_readfile;
int plugin_is_GPL_compatible;
+/* read_file_to_buffer --- handle the mechanics of reading the file */
+
+static char *
+read_file_to_buffer(int fd, const struct stat *sbuf)
+{
+ char *text = NULL;
+ int ret;
+
+ if ((sbuf->st_mode & S_IFMT) != S_IFREG) {
+ errno = EINVAL;
+ update_ERRNO_int(errno);
+ goto done;
+ }
+
+ emalloc(text, char *, sbuf->st_size + 2, "do_readfile");
+ memset(text, '\0', sbuf->st_size + 2);
+
+ if ((ret = read(fd, text, sbuf->st_size)) != sbuf->st_size) {
+ update_ERRNO_int(errno);
+ gawk_free(text);
+ text = NULL;
+ /* fall through to return */
+ }
+done:
+ return text;
+}
+
/* do_readfile --- read a file into memory */
static awk_value_t *
@@ -90,10 +118,6 @@ do_readfile(int nargs, awk_value_t *result)
if (ret < 0) {
update_ERRNO_int(errno);
goto done;
- } else if ((sbuf.st_mode & S_IFMT) != S_IFREG) {
- errno = EINVAL;
- update_ERRNO_int(errno);
- goto done;
}
if ((fd = open(filename.str_value.str, O_RDONLY|O_BINARY)) < 0) {
@@ -101,15 +125,9 @@ do_readfile(int nargs, awk_value_t *result)
goto done;
}
- emalloc(text, char *, sbuf.st_size + 2, "do_readfile");
- memset(text, '\0', sbuf.st_size + 2);
-
- if ((ret = read(fd, text, sbuf.st_size)) != sbuf.st_size) {
- (void) close(fd);
- update_ERRNO_int(errno);
- gawk_free(text);
- goto done;
- }
+ text = read_file_to_buffer(fd, & sbuf);
+ if (text == NULL)
+ goto done; /* ERRNO already updated */
close(fd);
make_malloced_string(text, sbuf.st_size, result);
@@ -117,12 +135,110 @@ do_readfile(int nargs, awk_value_t *result)
} else if (do_lint)
lintwarn(ext_id, _("readfile: called with no arguments"));
-
done:
/* Set the return value */
return result;
}
+/* readfile_get_record --- read the whole file as one record */
+
+static int
+readfile_get_record(char **out, awk_input_buf_t *iobuf, int *errcode,
+ char **rt_start, size_t *rt_len)
+{
+ char *text;
+
+ /*
+ * The caller sets *errcode to 0, so we should set it only if an
+ * error occurs.
+ */
+
+ if (out == NULL || iobuf == NULL)
+ return EOF;
+
+ if (iobuf->opaque != NULL) {
+ /*
+ * Already read the whole file,
+ * free up stuff and return EOF
+ */
+ gawk_free(iobuf->opaque);
+ iobuf->opaque = NULL;
+ return EOF;
+ }
+
+ /* read file */
+ text = read_file_to_buffer(iobuf->fd, & iobuf->sbuf);
+ if (text == NULL)
+ return EOF;
+
+ /* set up the iobuf for next time */
+ iobuf->opaque = text;
+
+ /* set return values */
+ *rt_start = NULL;
+ *rt_len = 0;
+ *out = text;
+
+ /* return count */
+ return iobuf->sbuf.st_size;
+}
+
+/* readfile_can_take_file --- return true if we want the file */
+
+static awk_bool_t
+readfile_can_take_file(const awk_input_buf_t *iobuf)
+{
+ awk_value_t array, index, value;
+
+ if (iobuf == NULL)
+ return awk_false;
+
+ /*
+ * This could fail if PROCINFO isn't referenced from
+ * the awk program. It's not a "can't happen" error.
+ */
+ if (! sym_lookup("PROCINFO", AWK_ARRAY, & array)) {
+ return awk_false;
+ }
+
+ (void) make_const_string("readfile", 8, & index);
+
+ if (! get_array_element(array.array_cookie, & index, AWK_UNDEFINED, & value)) {
+ return awk_false;
+ }
+
+ return awk_true;
+}
+
+/* readfile_take_control_of --- take over the file */
+
+static awk_bool_t
+readfile_take_control_of(awk_input_buf_t *iobuf)
+{
+ if (iobuf == NULL)
+ return awk_false;
+
+ iobuf->get_record = readfile_get_record;
+ return awk_true;
+}
+
+static awk_input_parser_t readfile_parser = {
+ "readfile",
+ readfile_can_take_file,
+ readfile_take_control_of,
+ NULL
+};
+
+/* init_readfile --- set things up */
+
+static awk_bool_t
+init_readfile()
+{
+ register_input_parser(& readfile_parser);
+
+ return awk_true;
+}
+
static awk_ext_func_t func_table[] = {
{ "readfile", do_readfile, 1 },
};
diff --git a/test/ChangeLog b/test/ChangeLog
index ab7ff0f2..f8d9f943 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (readfile2): New test.
+ * readfile2.awk, readfile2.ok: New files.
+
2014-02-28 Arnold D. Robbins <arnold@skeeve.com>
* regrange.ok: Update after code improvements.
diff --git a/test/Makefile.am b/test/Makefile.am
index ca39e0b8..d92f358e 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -708,6 +708,7 @@ EXTRA_DIST = \
range1.ok \
readdir.awk \
readdir0.awk \
+ readfile2.awk \
rebt8b1.awk \
rebt8b1.ok \
rebt8b2.awk \
@@ -1019,7 +1020,7 @@ LOCALE_CHARSET_TESTS = \
SHLIB_TESTS = \
fnmatch filefuncs fork fork2 fts functab4 inplace1 inplace2 inplace3 \
- ordchr ordchr2 readdir readfile revout revtwoway rwarray testext time
+ ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time
# List of the tests which should be run with --lint option:
NEED_LINT = \
@@ -1734,6 +1735,11 @@ readfile::
@$(AWK) -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) Makefile _$@ && rm -f _$@ || cp -p Makefile $@.ok
+readfile2::
+ @echo $@
+ @$(AWK) -f "$(srcdir)"/$@.awk "$(srcdir)"/$@.awk "$(srcdir)"/readdir.awk > _$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
include2::
@echo $@
@AWKPATH="$(srcdir)" $(AWK) --include inclib 'BEGIN {print sandwich("a", "b", "c")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Makefile.in b/test/Makefile.in
index 52fa49ef..6e1bcbb3 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -954,6 +954,7 @@ EXTRA_DIST = \
range1.ok \
readdir.awk \
readdir0.awk \
+ readfile2.awk \
rebt8b1.awk \
rebt8b1.ok \
rebt8b2.awk \
@@ -1261,7 +1262,7 @@ LOCALE_CHARSET_TESTS = \
SHLIB_TESTS = \
fnmatch filefuncs fork fork2 fts functab4 inplace1 inplace2 inplace3 \
- ordchr ordchr2 readdir readfile revout revtwoway rwarray testext time
+ ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time
# List of the tests which should be run with --lint option:
@@ -2158,6 +2159,11 @@ readfile::
@$(AWK) -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) Makefile _$@ && rm -f _$@ || cp -p Makefile $@.ok
+readfile2::
+ @echo $@
+ @$(AWK) -f "$(srcdir)"/$@.awk "$(srcdir)"/$@.awk "$(srcdir)"/readdir.awk > _$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
include2::
@echo $@
@AWKPATH="$(srcdir)" $(AWK) --include inclib 'BEGIN {print sandwich("a", "b", "c")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/readfile2.awk b/test/readfile2.awk
new file mode 100644
index 00000000..c21483fc
--- /dev/null
+++ b/test/readfile2.awk
@@ -0,0 +1,12 @@
+@load "readfile"
+BEGIN { PROCINFO["readfile"] = 1 }
+BEGINFILE { print "Start of", basename(FILENAME) }
+{ printf ("%d: <%s>\n", FNR, $0 ) }
+ENDFILE { print "End of", basename(FILENAME) }
+
+function basename(file, result)
+{
+ result = file
+ gsub(".*/", "", result)
+ return result
+}
diff --git a/test/readfile2.ok b/test/readfile2.ok
new file mode 100644
index 00000000..be1ded6c
--- /dev/null
+++ b/test/readfile2.ok
@@ -0,0 +1,21 @@
+Start of readfile2.awk
+1: <@load "readfile"
+BEGIN { PROCINFO["readfile"] = 1 }
+BEGINFILE { print "Start of", basename(FILENAME) }
+{ printf ("%d: <%s>\n", FNR, $0 ) }
+ENDFILE { print "End of", basename(FILENAME) }
+
+function basename(file, result)
+{
+ result = file
+ gsub(".*/", "", result)
+ return result
+}
+>
+End of readfile2.awk
+Start of readdir.awk
+1: <@load "readdir"
+
+{ print }
+>
+End of readdir.awk