summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--awk.h1
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/gawk.18
-rw-r--r--doc/gawk.info742
-rw-r--r--doc/gawk.texi10
-rw-r--r--doc/gawktexi.in10
-rw-r--r--field.c249
-rw-r--r--test/ChangeLog7
-rw-r--r--test/Makefile.am7
-rw-r--r--test/Makefile.in12
-rw-r--r--test/Maketests5
-rw-r--r--test/fpat6.awk8
-rw-r--r--test/fpat6.in13
-rw-r--r--test/fpat6.ok44
-rw-r--r--test/patsplit.ok3
16 files changed, 581 insertions, 550 deletions
diff --git a/ChangeLog b/ChangeLog
index d9380ca2..04091617 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,6 +3,14 @@
* gawkapi.c (awk_value_to_node): Initialize ext_ret_val to NULL
to avoid compiler warnings.
+2017-04-12 Manuel Collado <m-collado@users.sourceforge.net>
+
+ Fix the FPAT bug reported by Ed Morton in the gawk-bug mailing list.
+
+ * awk.h (Regexp): Remove the non_empty flag.
+ * field.c (fpat_parse_field): Restructure the code to reduce complexity
+ and document the new structure.
+
2017-04-10 Andrew J. Schorr <aschorr@telemetry-investments.com>
* awk.h (enum opcodeval): For the avoidance of doubt, specify that
diff --git a/awk.h b/awk.h
index 1d6ddb37..0ab47914 100644
--- a/awk.h
+++ b/awk.h
@@ -210,7 +210,6 @@ typedef struct Regexp {
struct re_pattern_buffer pat;
struct re_registers regs;
struct dfa *dfareg;
- bool non_empty; /* for use in fpat_parse_field */
bool has_meta; /* re has meta chars so (probably) isn't simple string */
bool maybe_long; /* re has meta chars that can match long text */
} Regexp;
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 82ae2ce3..61d09e28 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2017-04-12 Manuel Collado <m-collado@users.sourceforge.net>
+
+ * gawktexi.in, gawk.1: Small clarification of the patsplit behavior.
+
2017-04-11 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Minor style edits.
diff --git a/doc/gawk.1 b/doc/gawk.1
index a4f691d6..b04cb013 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -2977,9 +2977,11 @@ that matched
.IR r .
The value of
.BI seps[ i ]
-is the separator that appeared in
-front of
-.BI a[ i +1]\fR.
+is the possibly null separator that appeared after
+.BI a[ i ]\fR.
+The value of
+.B seps[0]
+is the possibly null leading separator.
\&\fRIf
.I r
is omitted,
diff --git a/doc/gawk.info b/doc/gawk.info
index 42c3c197..d3ae5639 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -12670,16 +12670,18 @@ Options::):
fatal error.
'patsplit(STRING, ARRAY' [', FIELDPAT' [', SEPS' ] ]') #'
- Divide STRING into pieces defined by FIELDPAT and store the pieces
- in ARRAY and the separator strings in the SEPS array. The first
- piece is stored in 'ARRAY[1]', the second piece in 'ARRAY[2]', and
- so forth. The third argument, FIELDPAT, is a regexp describing the
- fields in STRING (just as 'FPAT' is a regexp describing the fields
- in input records). It may be either a regexp constant or a string.
- If FIELDPAT is omitted, the value of 'FPAT' is used. 'patsplit()'
- returns the number of elements created. 'SEPS[I]' is the separator
- string between 'ARRAY[I]' and 'ARRAY[I+1]'. Any leading separator
- will be in 'SEPS[0]'.
+ Divide STRING into pieces (or "fields") defined by FIELDPAT and
+ store the pieces in ARRAY and the separator strings in the SEPS
+ array. The first piece is stored in 'ARRAY[1]', the second piece
+ in 'ARRAY[2]', and so forth. The third argument, FIELDPAT, is a
+ regexp describing the fields in STRING (just as 'FPAT' is a regexp
+ describing the fields in input records). It may be either a regexp
+ constant or a string. If FIELDPAT is omitted, the value of 'FPAT'
+ is used. 'patsplit()' returns the number of elements created.
+ 'SEPS[I]' is the possibly null separator string after 'ARRAY[I]'.
+ The possibly null leading separator will be in 'SEPS[0]'. So a
+ non-null STRING with N fields will have N+1 separators. A null
+ STRING will not have neither fields nor separators.
The 'patsplit()' function splits strings into pieces in a manner
similar to the way input lines are split into fields using 'FPAT'
@@ -32477,7 +32479,7 @@ Index
* * (asterisk), * operator, as regexp operator: Regexp Operators.
(line 89)
* * (asterisk), * operator, null strings, matching: String Functions.
- (line 537)
+ (line 539)
* * (asterisk), ** operator: Arithmetic Ops. (line 81)
* * (asterisk), ** operator <1>: Precedence. (line 48)
* * (asterisk), **= operator: Assignment Ops. (line 129)
@@ -32871,7 +32873,7 @@ Index
* asterisk (*), * operator, as regexp operator: Regexp Operators.
(line 89)
* asterisk (*), * operator, null strings, matching: String Functions.
- (line 537)
+ (line 539)
* asterisk (*), ** operator: Arithmetic Ops. (line 81)
* asterisk (*), ** operator <1>: Precedence. (line 48)
* asterisk (*), **= operator: Assignment Ops. (line 129)
@@ -33094,7 +33096,7 @@ Index
* Brian Kernighan's awk <8>: Continue Statement. (line 44)
* Brian Kernighan's awk <9>: Nextfile Statement. (line 47)
* Brian Kernighan's awk <10>: Delete. (line 51)
-* Brian Kernighan's awk <11>: String Functions. (line 493)
+* Brian Kernighan's awk <11>: String Functions. (line 495)
* Brian Kernighan's awk <12>: Gory Details. (line 19)
* Brian Kernighan's awk <13>: I/O Functions. (line 43)
* Brian Kernighan's awk, extensions: BTL. (line 6)
@@ -33137,7 +33139,7 @@ Index
* case sensitivity, and regexps: User-modified. (line 79)
* case sensitivity, and string comparisons: User-modified. (line 79)
* case sensitivity, array indices and: Array Intro. (line 100)
-* case sensitivity, converting case: String Functions. (line 523)
+* case sensitivity, converting case: String Functions. (line 525)
* case sensitivity, example programs: Library Functions. (line 53)
* case sensitivity, gawk: Case-sensitivity. (line 26)
* case sensitivity, regexps and: Case-sensitivity. (line 6)
@@ -33265,9 +33267,9 @@ Index
* control statements: Statements. (line 6)
* controlling array scanning order: Controlling Scanning.
(line 14)
-* convert string to lower case: String Functions. (line 524)
-* convert string to number: String Functions. (line 391)
-* convert string to upper case: String Functions. (line 530)
+* convert string to lower case: String Functions. (line 526)
+* convert string to number: String Functions. (line 393)
+* convert string to upper case: String Functions. (line 532)
* converting integer array subscripts: Numeric Array Subscripts.
(line 31)
* converting, dates to timestamps: Time Functions. (line 78)
@@ -33346,7 +33348,7 @@ Index
(line 149)
* dark corner, regexp constants, as arguments to user-defined functions: Standard Regexp Constants.
(line 43)
-* dark corner, split() function: String Functions. (line 361)
+* dark corner, split() function: String Functions. (line 363)
* dark corner, strings, storing: gawk split records. (line 82)
* dark corner, value of ARGV[0]: Auto-set. (line 39)
* dark corner, ^, in FS: Regexp Field Splitting.
@@ -33563,7 +33565,7 @@ Index
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
- (line 348)
+ (line 350)
* differences in awk and gawk, strings: Scalar Constants. (line 20)
* differences in awk and gawk, strings, storing: gawk split records.
(line 76)
@@ -33896,7 +33898,7 @@ Index
* format time string: Time Functions. (line 50)
* formats, numeric output: OFMT. (line 6)
* formatting output: Printf. (line 6)
-* formatting strings: String Functions. (line 384)
+* formatting strings: String Functions. (line 386)
* forward slash (/) to enclose regular expressions: Regexp. (line 10)
* forward slash (/), / operator: Precedence. (line 54)
* forward slash (/), /= operator: Assignment Ops. (line 129)
@@ -34169,7 +34171,7 @@ Index
* gsub: Standard Regexp Constants.
(line 43)
* gsub <1>: String Functions. (line 139)
-* gsub() function, arguments of: String Functions. (line 463)
+* gsub() function, arguments of: String Functions. (line 465)
* gsub() function, escape processing: Gory Details. (line 6)
* h debugger command (alias for help): Miscellaneous Debugger Commands.
(line 69)
@@ -34458,7 +34460,7 @@ Index
* matching, expressions, See comparison expressions: Typing and Comparison.
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
-* matching, null strings: String Functions. (line 537)
+* matching, null strings: String Functions. (line 539)
* mawk utility: Escape Sequences. (line 121)
* mawk utility <1>: Getline/Pipe. (line 62)
* mawk utility <2>: Concatenation. (line 36)
@@ -34547,7 +34549,7 @@ Index
(line 43)
* null strings, converting numbers to strings: Strings And Numbers.
(line 21)
-* null strings, matching: String Functions. (line 537)
+* null strings, matching: String Functions. (line 539)
* number as string of bits: Bitwise Functions. (line 108)
* number of array elements: String Functions. (line 200)
* number sign (#), #! (executable scripts): Executable Scripts.
@@ -34725,7 +34727,7 @@ Index
* portability, operators: Increment Ops. (line 60)
* portability, operators, not in POSIX awk: Precedence. (line 97)
* portability, POSIXLY_CORRECT environment variable: Options. (line 363)
-* portability, substr() function: String Functions. (line 513)
+* portability, substr() function: String Functions. (line 515)
* portable object files: Explaining gettext. (line 37)
* portable object files <1>: Translator i18n. (line 6)
* portable object files, converting to message object files: I18N Example.
@@ -34976,7 +34978,7 @@ Index
* regular expressions, searching for: Egrep Program. (line 6)
* relational operators, See comparison operators: Typing and Comparison.
(line 9)
-* replace in string: String Functions. (line 409)
+* replace in string: String Functions. (line 411)
* retrying input: Retrying Input. (line 6)
* return debugger command: Debugger Execution Control.
(line 54)
@@ -35144,7 +35146,7 @@ Index
(line 37)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
(line 74)
-* sidebar, Matching the Null String: String Functions. (line 535)
+* sidebar, Matching the Null String: String Functions. (line 537)
* sidebar, Operator Evaluation Order: Increment Ops. (line 58)
* sidebar, Piping into sh: Redirection. (line 134)
* sidebar, Pre-POSIX awk Used OFMT for String Conversion: Strings And Numbers.
@@ -35213,13 +35215,13 @@ Index
* source files, search path for: Programs Exercises. (line 70)
* sparse arrays: Array Intro. (line 76)
* Spencer, Henry: Glossary. (line 16)
-* split: String Functions. (line 315)
+* split: String Functions. (line 317)
* split string into array: String Functions. (line 296)
* split utility: Split Program. (line 6)
* split() function, array elements, deleting: Delete. (line 61)
* split.awk program: Split Program. (line 30)
* sprintf: OFMT. (line 15)
-* sprintf <1>: String Functions. (line 384)
+* sprintf <1>: String Functions. (line 386)
* sprintf() function, OFMT variable and: User-modified. (line 116)
* sprintf() function, print/printf statements and: Round Function.
(line 6)
@@ -35261,10 +35263,10 @@ Index
* string-manipulation functions: String Functions. (line 6)
* string-matching operators: Regexp Usage. (line 19)
* string-translation functions: I18N Functions. (line 6)
-* strings splitting, example: String Functions. (line 334)
+* strings splitting, example: String Functions. (line 336)
* strings, converting: Strings And Numbers. (line 6)
* strings, converting <1>: Bitwise Functions. (line 108)
-* strings, converting letter case: String Functions. (line 523)
+* strings, converting letter case: String Functions. (line 525)
* strings, converting, numbers to: User-modified. (line 30)
* strings, converting, numbers to <1>: User-modified. (line 107)
* strings, empty, See null strings: awk split records. (line 114)
@@ -35275,13 +35277,13 @@ Index
* strings, null: Regexp Field Splitting.
(line 43)
* strings, numeric: Variable Typing. (line 67)
-* strtonum: String Functions. (line 391)
+* strtonum: String Functions. (line 393)
* strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
(line 35)
* sub: Standard Regexp Constants.
(line 43)
-* sub <1>: String Functions. (line 409)
-* sub() function, arguments of: String Functions. (line 463)
+* sub <1>: String Functions. (line 411)
+* sub() function, arguments of: String Functions. (line 465)
* sub() function, escape processing: Gory Details. (line 6)
* subscript separators: User-modified. (line 149)
* subscripts in arrays, multidimensional: Multidimensional. (line 10)
@@ -35295,8 +35297,8 @@ Index
* SUBSEP variable, and multidimensional arrays: Multidimensional.
(line 16)
* substitute in string: String Functions. (line 89)
-* substr: String Functions. (line 482)
-* substring: String Functions. (line 482)
+* substr: String Functions. (line 484)
+* substring: String Functions. (line 484)
* Sumner, Andrew: Other Versions. (line 68)
* supplementary groups of gawk process: Auto-set. (line 252)
* switch statement: Switch Statement. (line 6)
@@ -35356,8 +35358,8 @@ Index
* timestamps, converting dates to: Time Functions. (line 78)
* timestamps, formatted: Getlocaltime Function.
(line 6)
-* tolower: String Functions. (line 524)
-* toupper: String Functions. (line 530)
+* tolower: String Functions. (line 526)
+* toupper: String Functions. (line 532)
* tr utility: Translate Program. (line 6)
* trace debugger command: Miscellaneous Debugger Commands.
(line 110)
@@ -35383,7 +35385,7 @@ Index
* troubleshooting, gawk, fatal errors, function arguments: Calling Built-in.
(line 16)
* troubleshooting, getline function: File Checking. (line 25)
-* troubleshooting, gsub()/sub() functions: String Functions. (line 473)
+* troubleshooting, gsub()/sub() functions: String Functions. (line 475)
* troubleshooting, match() function: String Functions. (line 291)
* troubleshooting, print statement, omitting commas: Print Examples.
(line 30)
@@ -35393,7 +35395,7 @@ Index
* troubleshooting, regexp constants vs. string constants: Computed Regexps.
(line 40)
* troubleshooting, string concatenation: Concatenation. (line 27)
-* troubleshooting, substr() function: String Functions. (line 500)
+* troubleshooting, substr() function: String Functions. (line 502)
* troubleshooting, system() function: I/O Functions. (line 129)
* troubleshooting, typographical errors, global variables: Options.
(line 99)
@@ -35790,336 +35792,336 @@ Ref: Numeric Functions-Footnote-1525480
Ref: Numeric Functions-Footnote-2525837
Ref: Numeric Functions-Footnote-3525885
Node: String Functions526157
-Ref: String Functions-Footnote-1549661
-Ref: String Functions-Footnote-2549789
-Ref: String Functions-Footnote-3550037
-Node: Gory Details550124
-Ref: table-sub-escapes551915
-Ref: table-sub-proposed553434
-Ref: table-posix-sub554797
-Ref: table-gensub-escapes556338
-Ref: Gory Details-Footnote-1557161
-Node: I/O Functions557315
-Ref: table-system-return-values563897
-Ref: I/O Functions-Footnote-1565877
-Ref: I/O Functions-Footnote-2566025
-Node: Time Functions566145
-Ref: Time Functions-Footnote-1576812
-Ref: Time Functions-Footnote-2576880
-Ref: Time Functions-Footnote-3577038
-Ref: Time Functions-Footnote-4577149
-Ref: Time Functions-Footnote-5577261
-Ref: Time Functions-Footnote-6577488
-Node: Bitwise Functions577754
-Ref: table-bitwise-ops578348
-Ref: Bitwise Functions-Footnote-1584381
-Ref: Bitwise Functions-Footnote-2584554
-Node: Type Functions584745
-Node: I18N Functions587420
-Node: User-defined589071
-Node: Definition Syntax589876
-Ref: Definition Syntax-Footnote-1595563
-Node: Function Example595634
-Ref: Function Example-Footnote-1598556
-Node: Function Caveats598578
-Node: Calling A Function599096
-Node: Variable Scope600054
-Node: Pass By Value/Reference603048
-Node: Return Statement606547
-Node: Dynamic Typing609526
-Node: Indirect Calls610456
-Ref: Indirect Calls-Footnote-1620707
-Node: Functions Summary620835
-Node: Library Functions623540
-Ref: Library Functions-Footnote-1627147
-Ref: Library Functions-Footnote-2627290
-Node: Library Names627461
-Ref: Library Names-Footnote-1630921
-Ref: Library Names-Footnote-2631144
-Node: General Functions631230
-Node: Strtonum Function632333
-Node: Assert Function635355
-Node: Round Function638681
-Node: Cliff Random Function640222
-Node: Ordinal Functions641238
-Ref: Ordinal Functions-Footnote-1644301
-Ref: Ordinal Functions-Footnote-2644553
-Node: Join Function644763
-Ref: Join Function-Footnote-1646533
-Node: Getlocaltime Function646733
-Node: Readfile Function650475
-Node: Shell Quoting652447
-Node: Data File Management653848
-Node: Filetrans Function654480
-Node: Rewind Function658576
-Node: File Checking660482
-Ref: File Checking-Footnote-1661816
-Node: Empty Files662017
-Node: Ignoring Assigns663996
-Node: Getopt Function665546
-Ref: Getopt Function-Footnote-1677015
-Node: Passwd Functions677215
-Ref: Passwd Functions-Footnote-1686054
-Node: Group Functions686142
-Ref: Group Functions-Footnote-1694040
-Node: Walking Arrays694247
-Node: Library Functions Summary697255
-Node: Library Exercises698661
-Node: Sample Programs699126
-Node: Running Examples699896
-Node: Clones700624
-Node: Cut Program701848
-Node: Egrep Program711777
-Ref: Egrep Program-Footnote-1719289
-Node: Id Program719399
-Node: Split Program723079
-Ref: Split Program-Footnote-1726538
-Node: Tee Program726667
-Node: Uniq Program729457
-Node: Wc Program736883
-Ref: Wc Program-Footnote-1741138
-Node: Miscellaneous Programs741232
-Node: Dupword Program742445
-Node: Alarm Program744475
-Node: Translate Program749330
-Ref: Translate Program-Footnote-1753895
-Node: Labels Program754165
-Ref: Labels Program-Footnote-1757516
-Node: Word Sorting757600
-Node: History Sorting761672
-Node: Extract Program763507
-Node: Simple Sed771036
-Node: Igawk Program774110
-Ref: Igawk Program-Footnote-1788441
-Ref: Igawk Program-Footnote-2788643
-Ref: Igawk Program-Footnote-3788765
-Node: Anagram Program788880
-Node: Signature Program791942
-Node: Programs Summary793189
-Node: Programs Exercises794403
-Ref: Programs Exercises-Footnote-1798532
-Node: Advanced Features798623
-Node: Nondecimal Data800613
-Node: Array Sorting802204
-Node: Controlling Array Traversal802904
-Ref: Controlling Array Traversal-Footnote-1811271
-Node: Array Sorting Functions811389
-Ref: Array Sorting Functions-Footnote-1816480
-Node: Two-way I/O816676
-Ref: Two-way I/O-Footnote-1823227
-Ref: Two-way I/O-Footnote-2823414
-Node: TCP/IP Networking823496
-Node: Profiling826614
-Ref: Profiling-Footnote-1835286
-Node: Advanced Features Summary835609
-Node: Internationalization837453
-Node: I18N and L10N838933
-Node: Explaining gettext839620
-Ref: Explaining gettext-Footnote-1845512
-Ref: Explaining gettext-Footnote-2845697
-Node: Programmer i18n845862
-Ref: Programmer i18n-Footnote-1850811
-Node: Translator i18n850860
-Node: String Extraction851654
-Ref: String Extraction-Footnote-1852786
-Node: Printf Ordering852872
-Ref: Printf Ordering-Footnote-1855658
-Node: I18N Portability855722
-Ref: I18N Portability-Footnote-1858178
-Node: I18N Example858241
-Ref: I18N Example-Footnote-1861047
-Node: Gawk I18N861120
-Node: I18N Summary861765
-Node: Debugger863106
-Node: Debugging864128
-Node: Debugging Concepts864569
-Node: Debugging Terms866378
-Node: Awk Debugging868953
-Node: Sample Debugging Session869859
-Node: Debugger Invocation870393
-Node: Finding The Bug871779
-Node: List of Debugger Commands878257
-Node: Breakpoint Control879590
-Node: Debugger Execution Control883284
-Node: Viewing And Changing Data886646
-Node: Execution Stack890020
-Node: Debugger Info891657
-Node: Miscellaneous Debugger Commands895728
-Node: Readline Support900816
-Node: Limitations901712
-Node: Debugging Summary903821
-Node: Arbitrary Precision Arithmetic905100
-Node: Computer Arithmetic906516
-Ref: table-numeric-ranges910107
-Ref: Computer Arithmetic-Footnote-1910829
-Node: Math Definitions910886
-Ref: table-ieee-formats914200
-Ref: Math Definitions-Footnote-1914803
-Node: MPFR features914908
-Node: FP Math Caution916625
-Ref: FP Math Caution-Footnote-1917697
-Node: Inexactness of computations918066
-Node: Inexact representation919026
-Node: Comparing FP Values920386
-Node: Errors accumulate921468
-Node: Getting Accuracy922901
-Node: Try To Round925611
-Node: Setting precision926510
-Ref: table-predefined-precision-strings927207
-Node: Setting the rounding mode929037
-Ref: table-gawk-rounding-modes929411
-Ref: Setting the rounding mode-Footnote-1932819
-Node: Arbitrary Precision Integers932998
-Ref: Arbitrary Precision Integers-Footnote-1937915
-Node: POSIX Floating Point Problems938064
-Ref: POSIX Floating Point Problems-Footnote-1941946
-Node: Floating point summary941984
-Node: Dynamic Extensions944174
-Node: Extension Intro945727
-Node: Plugin License946993
-Node: Extension Mechanism Outline947790
-Ref: figure-load-extension948229
-Ref: figure-register-new-function949794
-Ref: figure-call-new-function950886
-Node: Extension API Description952948
-Node: Extension API Functions Introduction954590
-Node: General Data Types959924
-Ref: General Data Types-Footnote-1967129
-Node: Memory Allocation Functions967428
-Ref: Memory Allocation Functions-Footnote-1970273
-Node: Constructor Functions970372
-Node: Registration Functions973371
-Node: Extension Functions974056
-Node: Exit Callback Functions979269
-Node: Extension Version String980519
-Node: Input Parsers981182
-Node: Output Wrappers993889
-Node: Two-way processors998401
-Node: Printing Messages1000666
-Ref: Printing Messages-Footnote-11001837
-Node: Updating ERRNO1001990
-Node: Requesting Values1002729
-Ref: table-value-types-returned1003466
-Node: Accessing Parameters1004402
-Node: Symbol Table Access1005637
-Node: Symbol table by name1006149
-Node: Symbol table by cookie1007938
-Ref: Symbol table by cookie-Footnote-11012123
-Node: Cached values1012187
-Ref: Cached values-Footnote-11015723
-Node: Array Manipulation1015814
-Ref: Array Manipulation-Footnote-11016905
-Node: Array Data Types1016942
-Ref: Array Data Types-Footnote-11019600
-Node: Array Functions1019692
-Node: Flattening Arrays1024091
-Node: Creating Arrays1031032
-Node: Redirection API1035801
-Node: Extension API Variables1038643
-Node: Extension Versioning1039276
-Ref: gawk-api-version1039713
-Node: Extension API Informational Variables1041441
-Node: Extension API Boilerplate1042505
-Node: Changes from API V11046367
-Node: Finding Extensions1047027
-Node: Extension Example1047586
-Node: Internal File Description1048384
-Node: Internal File Ops1052464
-Ref: Internal File Ops-Footnote-11063864
-Node: Using Internal File Ops1064004
-Ref: Using Internal File Ops-Footnote-11066387
-Node: Extension Samples1066661
-Node: Extension Sample File Functions1068190
-Node: Extension Sample Fnmatch1075839
-Node: Extension Sample Fork1077326
-Node: Extension Sample Inplace1078544
-Node: Extension Sample Ord1081754
-Node: Extension Sample Readdir1082590
-Ref: table-readdir-file-types1083479
-Node: Extension Sample Revout1084284
-Node: Extension Sample Rev2way1084873
-Node: Extension Sample Read write array1085613
-Node: Extension Sample Readfile1087555
-Node: Extension Sample Time1088650
-Node: Extension Sample API Tests1089998
-Node: gawkextlib1090490
-Node: Extension summary1092937
-Node: Extension Exercises1096639
-Node: Language History1098137
-Node: V7/SVR3.11099793
-Node: SVR41101945
-Node: POSIX1103379
-Node: BTL1104758
-Node: POSIX/GNU1105487
-Node: Feature History1111379
-Node: Common Extensions1125749
-Node: Ranges and Locales1127032
-Ref: Ranges and Locales-Footnote-11131648
-Ref: Ranges and Locales-Footnote-21131675
-Ref: Ranges and Locales-Footnote-31131910
-Node: Contributors1132131
-Node: History summary1137691
-Node: Installation1139071
-Node: Gawk Distribution1140015
-Node: Getting1140499
-Node: Extracting1141460
-Node: Distribution contents1143098
-Node: Unix Installation1149440
-Node: Quick Installation1150122
-Node: Shell Startup Files1152536
-Node: Additional Configuration Options1153625
-Node: Configuration Philosophy1155430
-Node: Non-Unix Installation1157799
-Node: PC Installation1158259
-Node: PC Binary Installation1159097
-Node: PC Compiling1159532
-Node: PC Using1160649
-Node: Cygwin1163694
-Node: MSYS1164464
-Node: VMS Installation1164965
-Node: VMS Compilation1165756
-Ref: VMS Compilation-Footnote-11166985
-Node: VMS Dynamic Extensions1167043
-Node: VMS Installation Details1168728
-Node: VMS Running1170981
-Node: VMS GNV1175260
-Node: VMS Old Gawk1175995
-Node: Bugs1176466
-Node: Bug address1177129
-Node: Usenet1179526
-Node: Maintainers1180303
-Node: Other Versions1181679
-Node: Installation summary1188263
-Node: Notes1189298
-Node: Compatibility Mode1190163
-Node: Additions1190945
-Node: Accessing The Source1191870
-Node: Adding Code1193305
-Node: New Ports1199523
-Node: Derived Files1204011
-Ref: Derived Files-Footnote-11209496
-Ref: Derived Files-Footnote-21209531
-Ref: Derived Files-Footnote-31210129
-Node: Future Extensions1210243
-Node: Implementation Limitations1210901
-Node: Extension Design1212084
-Node: Old Extension Problems1213238
-Ref: Old Extension Problems-Footnote-11214756
-Node: Extension New Mechanism Goals1214813
-Ref: Extension New Mechanism Goals-Footnote-11218177
-Node: Extension Other Design Decisions1218366
-Node: Extension Future Growth1220479
-Node: Old Extension Mechanism1221315
-Node: Notes summary1223078
-Node: Basic Concepts1224260
-Node: Basic High Level1224941
-Ref: figure-general-flow1225223
-Ref: figure-process-flow1225908
-Ref: Basic High Level-Footnote-11229209
-Node: Basic Data Typing1229394
-Node: Glossary1232722
-Node: Copying1264669
-Node: GNU Free Documentation License1302208
-Node: Index1327326
+Ref: String Functions-Footnote-1549815
+Ref: String Functions-Footnote-2549943
+Ref: String Functions-Footnote-3550191
+Node: Gory Details550278
+Ref: table-sub-escapes552069
+Ref: table-sub-proposed553588
+Ref: table-posix-sub554951
+Ref: table-gensub-escapes556492
+Ref: Gory Details-Footnote-1557315
+Node: I/O Functions557469
+Ref: table-system-return-values564051
+Ref: I/O Functions-Footnote-1566031
+Ref: I/O Functions-Footnote-2566179
+Node: Time Functions566299
+Ref: Time Functions-Footnote-1576966
+Ref: Time Functions-Footnote-2577034
+Ref: Time Functions-Footnote-3577192
+Ref: Time Functions-Footnote-4577303
+Ref: Time Functions-Footnote-5577415
+Ref: Time Functions-Footnote-6577642
+Node: Bitwise Functions577908
+Ref: table-bitwise-ops578502
+Ref: Bitwise Functions-Footnote-1584535
+Ref: Bitwise Functions-Footnote-2584708
+Node: Type Functions584899
+Node: I18N Functions587574
+Node: User-defined589225
+Node: Definition Syntax590030
+Ref: Definition Syntax-Footnote-1595717
+Node: Function Example595788
+Ref: Function Example-Footnote-1598710
+Node: Function Caveats598732
+Node: Calling A Function599250
+Node: Variable Scope600208
+Node: Pass By Value/Reference603202
+Node: Return Statement606701
+Node: Dynamic Typing609680
+Node: Indirect Calls610610
+Ref: Indirect Calls-Footnote-1620861
+Node: Functions Summary620989
+Node: Library Functions623694
+Ref: Library Functions-Footnote-1627301
+Ref: Library Functions-Footnote-2627444
+Node: Library Names627615
+Ref: Library Names-Footnote-1631075
+Ref: Library Names-Footnote-2631298
+Node: General Functions631384
+Node: Strtonum Function632487
+Node: Assert Function635509
+Node: Round Function638835
+Node: Cliff Random Function640376
+Node: Ordinal Functions641392
+Ref: Ordinal Functions-Footnote-1644455
+Ref: Ordinal Functions-Footnote-2644707
+Node: Join Function644917
+Ref: Join Function-Footnote-1646687
+Node: Getlocaltime Function646887
+Node: Readfile Function650629
+Node: Shell Quoting652601
+Node: Data File Management654002
+Node: Filetrans Function654634
+Node: Rewind Function658730
+Node: File Checking660636
+Ref: File Checking-Footnote-1661970
+Node: Empty Files662171
+Node: Ignoring Assigns664150
+Node: Getopt Function665700
+Ref: Getopt Function-Footnote-1677169
+Node: Passwd Functions677369
+Ref: Passwd Functions-Footnote-1686208
+Node: Group Functions686296
+Ref: Group Functions-Footnote-1694194
+Node: Walking Arrays694401
+Node: Library Functions Summary697409
+Node: Library Exercises698815
+Node: Sample Programs699280
+Node: Running Examples700050
+Node: Clones700778
+Node: Cut Program702002
+Node: Egrep Program711931
+Ref: Egrep Program-Footnote-1719443
+Node: Id Program719553
+Node: Split Program723233
+Ref: Split Program-Footnote-1726692
+Node: Tee Program726821
+Node: Uniq Program729611
+Node: Wc Program737037
+Ref: Wc Program-Footnote-1741292
+Node: Miscellaneous Programs741386
+Node: Dupword Program742599
+Node: Alarm Program744629
+Node: Translate Program749484
+Ref: Translate Program-Footnote-1754049
+Node: Labels Program754319
+Ref: Labels Program-Footnote-1757670
+Node: Word Sorting757754
+Node: History Sorting761826
+Node: Extract Program763661
+Node: Simple Sed771190
+Node: Igawk Program774264
+Ref: Igawk Program-Footnote-1788595
+Ref: Igawk Program-Footnote-2788797
+Ref: Igawk Program-Footnote-3788919
+Node: Anagram Program789034
+Node: Signature Program792096
+Node: Programs Summary793343
+Node: Programs Exercises794557
+Ref: Programs Exercises-Footnote-1798686
+Node: Advanced Features798777
+Node: Nondecimal Data800767
+Node: Array Sorting802358
+Node: Controlling Array Traversal803058
+Ref: Controlling Array Traversal-Footnote-1811425
+Node: Array Sorting Functions811543
+Ref: Array Sorting Functions-Footnote-1816634
+Node: Two-way I/O816830
+Ref: Two-way I/O-Footnote-1823381
+Ref: Two-way I/O-Footnote-2823568
+Node: TCP/IP Networking823650
+Node: Profiling826768
+Ref: Profiling-Footnote-1835440
+Node: Advanced Features Summary835763
+Node: Internationalization837607
+Node: I18N and L10N839087
+Node: Explaining gettext839774
+Ref: Explaining gettext-Footnote-1845666
+Ref: Explaining gettext-Footnote-2845851
+Node: Programmer i18n846016
+Ref: Programmer i18n-Footnote-1850965
+Node: Translator i18n851014
+Node: String Extraction851808
+Ref: String Extraction-Footnote-1852940
+Node: Printf Ordering853026
+Ref: Printf Ordering-Footnote-1855812
+Node: I18N Portability855876
+Ref: I18N Portability-Footnote-1858332
+Node: I18N Example858395
+Ref: I18N Example-Footnote-1861201
+Node: Gawk I18N861274
+Node: I18N Summary861919
+Node: Debugger863260
+Node: Debugging864282
+Node: Debugging Concepts864723
+Node: Debugging Terms866532
+Node: Awk Debugging869107
+Node: Sample Debugging Session870013
+Node: Debugger Invocation870547
+Node: Finding The Bug871933
+Node: List of Debugger Commands878411
+Node: Breakpoint Control879744
+Node: Debugger Execution Control883438
+Node: Viewing And Changing Data886800
+Node: Execution Stack890174
+Node: Debugger Info891811
+Node: Miscellaneous Debugger Commands895882
+Node: Readline Support900970
+Node: Limitations901866
+Node: Debugging Summary903975
+Node: Arbitrary Precision Arithmetic905254
+Node: Computer Arithmetic906670
+Ref: table-numeric-ranges910261
+Ref: Computer Arithmetic-Footnote-1910983
+Node: Math Definitions911040
+Ref: table-ieee-formats914354
+Ref: Math Definitions-Footnote-1914957
+Node: MPFR features915062
+Node: FP Math Caution916779
+Ref: FP Math Caution-Footnote-1917851
+Node: Inexactness of computations918220
+Node: Inexact representation919180
+Node: Comparing FP Values920540
+Node: Errors accumulate921622
+Node: Getting Accuracy923055
+Node: Try To Round925765
+Node: Setting precision926664
+Ref: table-predefined-precision-strings927361
+Node: Setting the rounding mode929191
+Ref: table-gawk-rounding-modes929565
+Ref: Setting the rounding mode-Footnote-1932973
+Node: Arbitrary Precision Integers933152
+Ref: Arbitrary Precision Integers-Footnote-1938069
+Node: POSIX Floating Point Problems938218
+Ref: POSIX Floating Point Problems-Footnote-1942100
+Node: Floating point summary942138
+Node: Dynamic Extensions944328
+Node: Extension Intro945881
+Node: Plugin License947147
+Node: Extension Mechanism Outline947944
+Ref: figure-load-extension948383
+Ref: figure-register-new-function949948
+Ref: figure-call-new-function951040
+Node: Extension API Description953102
+Node: Extension API Functions Introduction954744
+Node: General Data Types960078
+Ref: General Data Types-Footnote-1967283
+Node: Memory Allocation Functions967582
+Ref: Memory Allocation Functions-Footnote-1970427
+Node: Constructor Functions970526
+Node: Registration Functions973525
+Node: Extension Functions974210
+Node: Exit Callback Functions979423
+Node: Extension Version String980673
+Node: Input Parsers981336
+Node: Output Wrappers994043
+Node: Two-way processors998555
+Node: Printing Messages1000820
+Ref: Printing Messages-Footnote-11001991
+Node: Updating ERRNO1002144
+Node: Requesting Values1002883
+Ref: table-value-types-returned1003620
+Node: Accessing Parameters1004556
+Node: Symbol Table Access1005791
+Node: Symbol table by name1006303
+Node: Symbol table by cookie1008092
+Ref: Symbol table by cookie-Footnote-11012277
+Node: Cached values1012341
+Ref: Cached values-Footnote-11015877
+Node: Array Manipulation1015968
+Ref: Array Manipulation-Footnote-11017059
+Node: Array Data Types1017096
+Ref: Array Data Types-Footnote-11019754
+Node: Array Functions1019846
+Node: Flattening Arrays1024245
+Node: Creating Arrays1031186
+Node: Redirection API1035955
+Node: Extension API Variables1038797
+Node: Extension Versioning1039430
+Ref: gawk-api-version1039867
+Node: Extension API Informational Variables1041595
+Node: Extension API Boilerplate1042659
+Node: Changes from API V11046521
+Node: Finding Extensions1047181
+Node: Extension Example1047740
+Node: Internal File Description1048538
+Node: Internal File Ops1052618
+Ref: Internal File Ops-Footnote-11064018
+Node: Using Internal File Ops1064158
+Ref: Using Internal File Ops-Footnote-11066541
+Node: Extension Samples1066815
+Node: Extension Sample File Functions1068344
+Node: Extension Sample Fnmatch1075993
+Node: Extension Sample Fork1077480
+Node: Extension Sample Inplace1078698
+Node: Extension Sample Ord1081908
+Node: Extension Sample Readdir1082744
+Ref: table-readdir-file-types1083633
+Node: Extension Sample Revout1084438
+Node: Extension Sample Rev2way1085027
+Node: Extension Sample Read write array1085767
+Node: Extension Sample Readfile1087709
+Node: Extension Sample Time1088804
+Node: Extension Sample API Tests1090152
+Node: gawkextlib1090644
+Node: Extension summary1093091
+Node: Extension Exercises1096793
+Node: Language History1098291
+Node: V7/SVR3.11099947
+Node: SVR41102099
+Node: POSIX1103533
+Node: BTL1104912
+Node: POSIX/GNU1105641
+Node: Feature History1111533
+Node: Common Extensions1125903
+Node: Ranges and Locales1127186
+Ref: Ranges and Locales-Footnote-11131802
+Ref: Ranges and Locales-Footnote-21131829
+Ref: Ranges and Locales-Footnote-31132064
+Node: Contributors1132285
+Node: History summary1137845
+Node: Installation1139225
+Node: Gawk Distribution1140169
+Node: Getting1140653
+Node: Extracting1141614
+Node: Distribution contents1143252
+Node: Unix Installation1149594
+Node: Quick Installation1150276
+Node: Shell Startup Files1152690
+Node: Additional Configuration Options1153779
+Node: Configuration Philosophy1155584
+Node: Non-Unix Installation1157953
+Node: PC Installation1158413
+Node: PC Binary Installation1159251
+Node: PC Compiling1159686
+Node: PC Using1160803
+Node: Cygwin1163848
+Node: MSYS1164618
+Node: VMS Installation1165119
+Node: VMS Compilation1165910
+Ref: VMS Compilation-Footnote-11167139
+Node: VMS Dynamic Extensions1167197
+Node: VMS Installation Details1168882
+Node: VMS Running1171135
+Node: VMS GNV1175414
+Node: VMS Old Gawk1176149
+Node: Bugs1176620
+Node: Bug address1177283
+Node: Usenet1179680
+Node: Maintainers1180457
+Node: Other Versions1181833
+Node: Installation summary1188417
+Node: Notes1189452
+Node: Compatibility Mode1190317
+Node: Additions1191099
+Node: Accessing The Source1192024
+Node: Adding Code1193459
+Node: New Ports1199677
+Node: Derived Files1204165
+Ref: Derived Files-Footnote-11209650
+Ref: Derived Files-Footnote-21209685
+Ref: Derived Files-Footnote-31210283
+Node: Future Extensions1210397
+Node: Implementation Limitations1211055
+Node: Extension Design1212238
+Node: Old Extension Problems1213392
+Ref: Old Extension Problems-Footnote-11214910
+Node: Extension New Mechanism Goals1214967
+Ref: Extension New Mechanism Goals-Footnote-11218331
+Node: Extension Other Design Decisions1218520
+Node: Extension Future Growth1220633
+Node: Old Extension Mechanism1221469
+Node: Notes summary1223232
+Node: Basic Concepts1224414
+Node: Basic High Level1225095
+Ref: figure-general-flow1225377
+Ref: figure-process-flow1226062
+Ref: Basic High Level-Footnote-11229363
+Node: Basic Data Typing1229548
+Node: Glossary1232876
+Node: Copying1264823
+Node: GNU Free Documentation License1302362
+Node: Index1327480

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 0e376104..8b872e9d 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -17994,7 +17994,7 @@ using a third argument is a fatal error.
@cindexgawkfunc{patsplit}
@cindex split string into array
Divide
-@var{string} into pieces defined by @var{fieldpat}
+@var{string} into pieces (or ``fields'') defined by @var{fieldpat}
and store the pieces in @var{array} and the separator strings in the
@var{seps} array. The first piece is stored in
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
@@ -18005,9 +18005,11 @@ It may be either a regexp constant or a string.
If @var{fieldpat} is omitted, the value of @code{FPAT} is used.
@code{patsplit()} returns the number of elements created.
@code{@var{seps}[@var{i}]} is
-the separator string
-between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}.
-Any leading separator will be in @code{@var{seps}[0]}.
+the possibly null separator string
+after @code{@var{array}[@var{i}]}.
+The possibly null leading separator will be in @code{@var{seps}[0]}.
+So a non-null @var{string} with @var{n} fields will have @var{n+1} separators.
+A null @var{string} will not have neither fields nor separators.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index f4fe2596..f991432c 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -17267,7 +17267,7 @@ using a third argument is a fatal error.
@cindexgawkfunc{patsplit}
@cindex split string into array
Divide
-@var{string} into pieces defined by @var{fieldpat}
+@var{string} into pieces (or ``fields'') defined by @var{fieldpat}
and store the pieces in @var{array} and the separator strings in the
@var{seps} array. The first piece is stored in
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
@@ -17278,9 +17278,11 @@ It may be either a regexp constant or a string.
If @var{fieldpat} is omitted, the value of @code{FPAT} is used.
@code{patsplit()} returns the number of elements created.
@code{@var{seps}[@var{i}]} is
-the separator string
-between @code{@var{array}[@var{i}]} and @code{@var{array}[@var{i}+1]}.
-Any leading separator will be in @code{@var{seps}[0]}.
+the possibly null separator string
+after @code{@var{array}[@var{i}]}.
+The possibly null leading separator will be in @code{@var{seps}[0]}.
+So a non-null @var{string} with @var{n} fields will have @var{n+1} separators.
+A null @var{string} will not have neither fields nor separators.
The @code{patsplit()} function splits strings into pieces in a
manner similar to the way input lines are split into fields using @code{FPAT}
diff --git a/field.c b/field.c
index a3be9773..8145141c 100644
--- a/field.c
+++ b/field.c
@@ -1502,101 +1502,65 @@ incr_scan(char **scanp, size_t len, mbstate_t *mbs)
* via (*parse_field)(). This variation is for when FPAT is a regular
* expression -- use the value to find field contents.
*
- * This was really hard to get right. It happens to bear many resemblances
- * to issues I had with getting gsub right with null matches. When dealing
- * with that I prototyped in awk and had the foresight to save the awk code
- * over in the C file. Starting with that as a base, I finally got to this
- * awk code to do what I needed, and then translated it into C. Fortunately
- * the C code bears a closer correspondance to the awk code here than over
- * by gsub.
+ * The FPAT parsing logic is a bit difficult to specify. In particular
+ * to allow null fields at certain locations. To make the code as robust
+ * as possible, an awk reference implementation was written and tested
+ * as a first step, and later recoded in C, preserving its structure as
+ * much as possible.
*
- * BEGIN {
- * false = 0
- * true = 1
- *
- * fpat[1] = "([^,]*)|(\"[^\"]+\")"
- * fpat[2] = fpat[1]
- * fpat[3] = fpat[1]
- * fpat[4] = "aa+"
- * fpat[5] = fpat[4]
- *
- * data[1] = "Robbins,,Arnold,"
- * data[2] = "Smith,,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
- * data[3] = "Robbins,Arnold,\"1234 A Pretty Place, NE\",Sometown,NY,12345-6789,USA"
- * data[4] = "bbbaaacccdddaaaaaqqqq"
- * data[5] = "bbbaaacccdddaaaaaqqqqa" # should get trailing qqqa
- *
- * for (i = 1; i in data; i++) {
- * printf("Splitting: <%s>\n", data[i])
- * n = mypatsplit(data[i], fields, fpat[i], seps)
- * print "n =", n
- * for (j = 1; j <= n; j++)
- * printf("fields[%d] = <%s>\n", j, fields[j])
- * for (j = 0; j in seps; j++)
- * printf("seps[%s] = <%s>\n", j, seps[j])
- * }
- * }
- *
- * function mypatsplit(string, array, pattern, seps,
- * eosflag, non_empty, nf) # locals
+ * # Reference implementation of the FPAT record parsing.
+ * #
+ * # Each loop iteration identifies a (separator[n-1],field[n]) pair.
+ * # Each loop iteration must consume some characters, except for the first field.
+ * # So a null field is only valid as a first field or after a non-null separator.
+ * # A null record has no fields (not a single null field).
+ *
+ * function refpatsplit(string, fields, pattern, seps,
+ * parse_start, sep_start, field_start, field_length, field_found, nf) # locals
* {
- * delete array
- * delete seps
- * if (length(string) == 0)
- * return 0
- *
- * eosflag = non_empty = false
- * nf = 0
- * while (match(string, pattern)) {
- * if (RLENGTH > 0) { # easy case
- * non_empty = true
- * if (! (nf in seps)) {
- * if (RSTART == 1) # match at front of string
- * seps[nf] = ""
- * else
- * seps[nf] = substr(string, 1, RSTART - 1)
- * }
- * array[++nf] = substr(string, RSTART, RLENGTH)
- * string = substr(string, RSTART+RLENGTH)
- * if (length(string) == 0)
- * break
- * } else if (non_empty) {
- * # last match was non-empty, and at the
- * # current character we get a zero length match,
- * # which we don't want, so skip over it
- * non_empty = false
- * seps[nf] = substr(string, 1, 1)
- * string = substr(string, 2)
- * } else {
- * # 0 length match
- * if (! (nf in seps)) {
- * if (RSTART == 1)
- * seps[nf] = ""
- * else
- * seps[nf] = substr(string, 1, RSTART - 1)
- * }
- * array[++nf] = ""
- * if (! non_empty && ! eosflag) { # prev was empty
- * seps[nf] = substr(string, 1, 1)
- * }
- * if (RSTART == 1) {
- * string = substr(string, 2)
- * } else {
- * string = substr(string, RSTART + 1)
- * }
- * non_empty = false
- * }
- * if (length(string) == 0) {
- * if (eosflag)
- * break
- * else
- * eosflag = true
- * }
- * }
- * if (length(string) > 0)
- * seps[nf] = string
- *
- * return length(array)
+ * # Local state variables:
+ * # - parse_start: pointer to the first not yet consumed character
+ * # - sep_start: pointer to the beginning of the parsed separator
+ * # - field start: pointer to the beginning of the parsed field
+ * # - field length: length of the parsed field
+ * # - field_found: flag for succesful field match
+ * # - nf: Number of fields found so far
+ *
+ * # Prepare for parsing
+ * parse_start = 1 # first not yet parsed char
+ * nf = 0 # fields found so far
+ * delete fields
+ * delete seps
+ *
+ * # Loop that consumes the whole record
+ * while (parse_start <= length(string)) { # still something to parse
+ *
+ * # first attempt to match the next field
+ * sep_start = parse_start
+ * field_found = match(substr(string, parse_start), pattern)
+ *
+ * # check for an invalid null field and retry one character away
+ * if (nf > 0 && field_found && RSTART==1 && RLENGTH==0) {
+ * parse_start++
+ * field_found = match(substr(string, parse_start), pattern)
+ * }
+ *
+ * # store the (sep[n-1],field[n]) pair
+ * if (field_found) {
+ * field_start = parse_start + RSTART - 1
+ * field_length = RLENGTH
+ * seps[nf] = substr(string, sep_start, field_start-sep_start)
+ * fields[++nf] = substr(string, field_start, field_length)
+ * parse_start = field_start + field_length
+ *
+ * # store the final extra sep after the last field
+ * } else {
+ * seps[nf] = substr(string, sep_start)
+ * parse_start = length(string) + 1
+ * }
+ * }
+ *
+ * return nf
* }
*/
static long
@@ -1615,10 +1579,9 @@ fpat_parse_field(long up_to, /* parse only up to this field number */
char *start;
char *end = scan + len;
int regex_flags = RE_NEED_START;
- bool need_to_set_sep;
- bool non_empty;
- bool eosflag;
mbstate_t mbs;
+ char* field_start;
+ bool field_found;
memset(&mbs, 0, sizeof(mbstate_t));
@@ -1631,90 +1594,48 @@ fpat_parse_field(long up_to, /* parse only up to this field number */
if (rp == NULL) /* use FPAT */
rp = FPAT_regexp;
- if (in_middle) {
- regex_flags |= RE_NO_BOL;
- }
- non_empty = rp->non_empty;
+ while (scan <= end && nf < up_to) { /* still something to parse */
- eosflag = false;
- need_to_set_sep = true;
- start = scan;
- while (research(rp, scan, 0, (end - scan), regex_flags) != -1
- && nf < up_to) {
+ /* first attempt to match the next field */
+ start = scan;
+ field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
+
+ /* check for an invalid null field and retry one character away */
+ if (nf > 0 && field_found && REEND(rp, scan) == 0) { /* invalid null field */
+ increment_scan(& scan, end - scan);
+ field_found = research(rp, scan, 0, (end - scan), regex_flags) != -1;
+ }
- if (REEND(rp, scan) > RESTART(rp, scan)) { /* if (RLENGTH > 0) */
- non_empty = true;
- if (sep_arr != NULL && need_to_set_sep) {
- if (RESTART(rp, scan) == 0) /* match at front */
- set_element(nf, start, 0L, sep_arr);
+ /* store the (sep[n-1],field[n]) pair */
+ if (field_found) {
+ field_start = scan + RESTART(rp, scan);
+ if (sep_arr != NULL) { /* store the separator */
+ if (field_start == start) /* match at front */
+ set_element(nf, start, 0L, sep_arr);
else
- set_element(nf,
+ set_element(nf,
start,
- (long) RESTART(rp, scan),
+ (long) (field_start - start),
sep_arr);
}
/* field is text that matched */
(*set)(++nf,
- scan + RESTART(rp, scan),
+ field_start,
(long)(REEND(rp, scan) - RESTART(rp, scan)),
n);
-
scan += REEND(rp, scan);
- if (scan >= end)
- break;
- need_to_set_sep = true;
- } else if (non_empty) { /* else if non_empty */
+
+ } else {
/*
- * last match was non-empty, and at the
- * current character we get a zero length match,
- * which we don't want, so skip over it
+ * No match, store the final extra separator after
+ * the last field.
*/
- non_empty = false;
- if (sep_arr != NULL) {
- need_to_set_sep = false;
- set_element(nf, start, 1L, sep_arr);
- }
- increment_scan(& scan, end - scan);
- } else {
- /* 0 length match */
- if (sep_arr != NULL && need_to_set_sep) {
- if (RESTART(rp, scan) == 0) /* RSTART == 1 */
- set_element(nf, start, 0L, sep_arr);
- else
- set_element(nf, start,
- (long) RESTART(rp, scan),
- sep_arr);
- }
- need_to_set_sep = true;
- (*set)(++nf, scan, 0L, n);
- if (! non_empty && ! eosflag) { /* prev was empty */
- if (sep_arr != NULL) {
- set_element(nf, start, 1L, sep_arr);
- need_to_set_sep = false;
- }
- }
- if (RESTART(rp, scan) == 0)
- increment_scan(& scan, end - scan);
- else {
- scan += RESTART(rp, scan);
- }
- non_empty = false;
- }
- if (scan >= end) { /* length(string) == 0 */
- if (eosflag)
- break;
- else
- eosflag = true;
+ if (sep_arr != NULL)
+ set_element(nf, start, (long) (end - start), sep_arr);
+ scan = end + 1;
}
-
- start = scan;
- }
- if (scan < end) {
- if (sep_arr != NULL)
- set_element(nf, scan, (long) (end - scan), sep_arr);
}
*buf = scan;
- rp->non_empty = non_empty;
return nf;
}
diff --git a/test/ChangeLog b/test/ChangeLog
index dd6b4cfb..ecac0519 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,10 @@
+2017-04-12 Manuel Collado <m-collado@users.sourceforge.net>
+
+ * Makefile.am (fpat6): New test.
+ * fpat6.awk, fpat6.in, fpat6.ok: New files.
+ Check for the bug reported by Ed Morton in the bug-gawk mailing list.
+ * patsplit.ok: Updated to the new patsplit behavior.
+
2017-04-12 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (memleak): New test.
diff --git a/test/Makefile.am b/test/Makefile.am
index fe9b1dcc..7b1b4946 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -339,6 +339,9 @@ EXTRA_DIST = \
fpat5.awk \
fpat5.in \
fpat5.ok \
+ fpat6.awk \
+ fpat6.in \
+ fpat6.ok \
fpatnull.awk \
fpatnull.in \
fpatnull.ok \
@@ -1227,8 +1230,8 @@ GAWK_EXT_TESTS = \
colonwarn clos1way clos1way2 clos1way3 clos1way4 clos1way5 clos1way6 \
crlf dbugeval dbugeval2 dbugtypedre1 dbugtypedre2 delsub \
devfd devfd1 devfd2 dumpvars errno exit \
- fieldwdth forcenum fpat1 fpat2 fpat3 fpat4 fpat5 fpatnull fsfwfs funlen \
- functab1 functab2 functab3 fwtest fwtest2 fwtest3 fwtest4 \
+ fieldwdth forcenum fpat1 fpat2 fpat3 fpat4 fpat5 fpat6 fpatnull \
+ fsfwfs funlen functab1 functab2 functab3 fwtest fwtest2 fwtest3 fwtest4 \
genpot gensub gensub2 gensub3 getlndir gnuops2 gnuops3 gnureops gsubind \
icasefs icasers id igncdym igncfs ignrcas2 ignrcas4 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
diff --git a/test/Makefile.in b/test/Makefile.in
index e6293e8d..bdfbdc82 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -597,6 +597,9 @@ EXTRA_DIST = \
fpat5.awk \
fpat5.in \
fpat5.ok \
+ fpat6.awk \
+ fpat6.in \
+ fpat6.ok \
fpatnull.awk \
fpatnull.in \
fpatnull.ok \
@@ -1484,8 +1487,8 @@ GAWK_EXT_TESTS = \
colonwarn clos1way clos1way2 clos1way3 clos1way4 clos1way5 clos1way6 \
crlf dbugeval dbugeval2 dbugtypedre1 dbugtypedre2 delsub \
devfd devfd1 devfd2 dumpvars errno exit \
- fieldwdth forcenum fpat1 fpat2 fpat3 fpat4 fpat5 fpatnull fsfwfs funlen \
- functab1 functab2 functab3 fwtest fwtest2 fwtest3 fwtest4 \
+ fieldwdth forcenum fpat1 fpat2 fpat3 fpat4 fpat5 fpat6 fpatnull \
+ fsfwfs funlen functab1 functab2 functab3 fwtest fwtest2 fwtest3 fwtest4 \
genpot gensub gensub2 gensub3 getlndir gnuops2 gnuops3 gnureops gsubind \
icasefs icasers id igncdym igncfs ignrcas2 ignrcas4 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
@@ -3966,6 +3969,11 @@ fpat5:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+fpat6:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
fpatnull:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index 4b765c9f..0c77f98a 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1142,6 +1142,11 @@ fpat5:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+fpat6:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
fpatnull:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/fpat6.awk b/test/fpat6.awk
new file mode 100644
index 00000000..de7824a4
--- /dev/null
+++ b/test/fpat6.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ FPAT = "([^,]*)|(\"[^\"]+\")"
+}
+{
+ print "NF =", NF
+ for (i = 1; i <= NF; i++)
+ printf("$%d = <%s>\n", i, $i)
+}
diff --git a/test/fpat6.in b/test/fpat6.in
new file mode 100644
index 00000000..1924cd97
--- /dev/null
+++ b/test/fpat6.in
@@ -0,0 +1,13 @@
+,,3
+,,3
+,,a,,b,,
+,,a,,b,,
+"a",,"b"
+
+
+""
+""
+xx
+xx
+,
+,
diff --git a/test/fpat6.ok b/test/fpat6.ok
new file mode 100644
index 00000000..f9c393a1
--- /dev/null
+++ b/test/fpat6.ok
@@ -0,0 +1,44 @@
+NF = 3
+$1 = <>
+$2 = <>
+$3 = <3>
+NF = 3
+$1 = <>
+$2 = <>
+$3 = <3>
+NF = 7
+$1 = <>
+$2 = <>
+$3 = <a>
+$4 = <>
+$5 = <b>
+$6 = <>
+$7 = <>
+NF = 7
+$1 = <>
+$2 = <>
+$3 = <a>
+$4 = <>
+$5 = <b>
+$6 = <>
+$7 = <>
+NF = 3
+$1 = <"a">
+$2 = <>
+$3 = <"b">
+NF = 0
+NF = 0
+NF = 1
+$1 = <"">
+NF = 1
+$1 = <"">
+NF = 1
+$1 = <xx>
+NF = 1
+$1 = <xx>
+NF = 2
+$1 = <>
+$2 = <>
+NF = 2
+$1 = <>
+$2 = <>
diff --git a/test/patsplit.ok b/test/patsplit.ok
index cda8319e..02387d86 100644
--- a/test/patsplit.ok
+++ b/test/patsplit.ok
@@ -8,6 +8,7 @@ seps[0] = <>
seps[1] = <,>
seps[2] = <,>
seps[3] = <,>
+seps[4] = <>
Splitting: <Smith,,"1234 A Pretty Place, NE",Sometown,NY,12345-6789,USA>
n = 7
fields[1] = <Smith>
@@ -24,6 +25,7 @@ seps[3] = <,>
seps[4] = <,>
seps[5] = <,>
seps[6] = <,>
+seps[7] = <>
Splitting: <Robbins,Arnold,"1234 A Pretty Place, NE",Sometown,NY,12345-6789,USA>
n = 7
fields[1] = <Robbins>
@@ -40,6 +42,7 @@ seps[3] = <,>
seps[4] = <,>
seps[5] = <,>
seps[6] = <,>
+seps[7] = <>
Splitting: <bbbaaacccdddaaaaaqqqq>
n = 2
fields[1] = <aaa>