summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--NEWS6
-rw-r--r--TODO6
-rw-r--r--configh.in3
-rwxr-xr-xconfigure2
-rw-r--r--configure.ac2
-rw-r--r--doc/gawk.info823
-rw-r--r--doc/gawk.texi31
-rw-r--r--eval.c85
-rw-r--r--missing_d/ChangeLog6
-rw-r--r--missing_d/memcmp.c4
-rw-r--r--missing_d/memcpy.c6
-rw-r--r--missing_d/memmove.c2
-rw-r--r--missing_d/memset.c8
-rw-r--r--missing_d/strchr.c6
-rw-r--r--missing_d/strcoll.c7
-rw-r--r--missing_d/strtod.c4
-rw-r--r--protos.h4
-rw-r--r--replace.c4
19 files changed, 597 insertions, 420 deletions
diff --git a/ChangeLog b/ChangeLog
index c0c9ac3d..d7ff45d8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Thu Nov 25 08:32:31 2010 Arnold D. Robbins <arnold@skeeve.com>
+
+ * eval.c (posix_compare): Do string comparison with strcoll() /
+ wcscoll().
+ (cmp_nodes): Call it if do_posix. This may be a bad idea,
+ but what the heck. Standards compatibility uber alles!
+
Wed Nov 24 20:09:23 2010 Arnold D. Robbins <arnold@skeeve.com>
* ext.c (do_ext): Require definition of `plugin_is_GPL_compatible'
@@ -21,7 +28,6 @@ Sun Nov 21 14:23:58 2010 John Haque <j.eh@mchsi.com>
(assign_common, assign, compare): Nuked macros.
(cmp_scalar, op_assign): New functions as replacements for the macros.
-
Fri Nov 19 11:57:28 2010 Arnold D. Robbins <arnold@skeeve.com>
* bootstrap.sh, Makefile.am: Remove treatment of CVS.
diff --git a/NEWS b/NEWS
index 431192e0..047f676d 100644
--- a/NEWS
+++ b/NEWS
@@ -66,8 +66,12 @@ Changes from 3.1.8 to 4.0.0
22. Per the GNU Coding Standards, dynamic extensions must now define
a global symbol indicating that they are GPL-compatible. See
the documentation and example extensions.
+ THIS CHANGES BEHAVIOR!!!!
-23. Many code cleanups. Removed code for many old, unsupported systems.
+23. In POSIX mode, string comparisons use strcoll/wcscoll.
+ THIS CHANGES BEHAVIOR!!!!
+
+24. Many code cleanups. Removed code for many old, unsupported systems.
Changes from 3.1.7 to 3.1.8
---------------------------
diff --git a/TODO b/TODO
index 4bc0b372..8a0c1be6 100644
--- a/TODO
+++ b/TODO
@@ -22,9 +22,9 @@ xgawk features (@load, -l, others)
#Xfer it to savannah
#Tag last CVS revisions
#Remove treatment of CVS directories from makefiles (awklib, check others)
-# Review POSIX standard
-Fix issues related to POSIX
- - use of STRCOLL for comparison
+#Review POSIX standard
+#Fix issues related to POSIX
+# - use of STRCOLL for comparison
Add tests for pgawk
Add tests for patches in emails
Add doc fix in email
diff --git a/configh.in b/configh.in
index edcef5f0..b3168c0a 100644
--- a/configh.in
+++ b/configh.in
@@ -197,6 +197,9 @@
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
+/* Define to 1 if you have the `strcoll' function. */
+#undef HAVE_STRCOLL
+
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
diff --git a/configure b/configure
index 25e74436..71d9e1fd 100755
--- a/configure
+++ b/configure
@@ -9747,7 +9747,7 @@ for ac_func in atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp setenv setlocale snprintf strchr \
- strerror strftime strncasecmp strtod strtoul \
+ strerror strftime strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wcscoll wctype
do :
diff --git a/configure.ac b/configure.ac
index d1998f63..3e1b231a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -268,7 +268,7 @@ AC_CHECK_FUNCS(atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp setenv setlocale snprintf strchr \
- strerror strftime strncasecmp strtod strtoul \
+ strerror strftime strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wcscoll wctype)
dnl this check is for both mbrtowc and the mbstate_t type, which is good
diff --git a/doc/gawk.info b/doc/gawk.info
index dd1f8a98..70c63046 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -237,6 +237,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
with `<', etc.
* Variable Typing:: String type versus numeric type.
* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.
* Boolean Ops:: Combining comparison expressions using
boolean operators `||' (``or''),
`&&' (``and'') and `!' (``not'').
@@ -6664,6 +6665,7 @@ are typed, and how `awk' compares variables.
* Variable Typing:: String type versus numeric type.
* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.

File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typing and Comparison
@@ -6763,7 +6765,7 @@ otherwise:
gratifying that the POSIX standard is also now correct.

-File: gawk.info, Node: Comparison Operators, Prev: Variable Typing, Up: Typing and Comparison
+File: gawk.info, Node: Comparison Operators, Next: POSIX String Comparison, Prev: Variable Typing, Up: Typing and Comparison
5.3.2.2 Comparison Operators
............................
@@ -6876,6 +6878,33 @@ abbreviation for the following comparison expression:
Constant Regexps::, where this is discussed in more detail.

+File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, Up: Typing and Comparison
+
+5.3.2.3 String comparison with POSIX rules.
+...........................................
+
+The POSIX standard says that string comparison is performed based on
+the locale's collating order. This is usually very different from the
+results obtained when doing straight character-by-character
+comparison.(1)
+
+ Because this behavior differs considerably from existing practice,
+`gawk' only implements it when in POSIX mode (*note Options::). Here
+is an example to illustrate the difference, in a `en_US.UTF-8' locale:
+
+ $ gawk 'BEGIN { printf("ABC < abc = %s\n",
+ > ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
+ -| ABC < abc = TRUE
+ $ gawk --posix 'BEGIN { printf("ABC < abc = %s\n",
+ > ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
+ -| ABC < abc = FALSE
+
+ ---------- Footnotes ----------
+
+ (1) Technically, string comparison is supposed to behave the same
+way as if the strings are compared with the C `strcoll()' function.
+
+
File: gawk.info, Node: Boolean Ops, Next: Conditional Exp, Prev: Typing and Comparison, Up: Truth Values and Conditions
5.3.3 Boolean Expressions
@@ -26826,400 +26855,402 @@ Index

Tag Table:
Node: Top1340
-Node: Foreword29871
-Node: Preface34187
-Ref: Preface-Footnote-137139
-Ref: Preface-Footnote-237245
-Node: History37477
-Node: Names39709
-Ref: Names-Footnote-141186
-Node: This Manual41258
-Ref: This Manual-Footnote-146156
-Node: Conventions46256
-Node: Manual History48315
-Ref: Manual History-Footnote-151493
-Ref: Manual History-Footnote-251534
-Node: How To Contribute51608
-Node: Acknowledgments52752
-Node: Getting Started57021
-Node: Running gawk59393
-Node: One-shot60579
-Node: Read Terminal61804
-Ref: Read Terminal-Footnote-163454
-Ref: Read Terminal-Footnote-263728
-Node: Long63899
-Node: Executable Scripts65275
-Ref: Executable Scripts-Footnote-167136
-Ref: Executable Scripts-Footnote-267238
-Node: Comments67689
-Node: Quoting70057
-Node: DOS Quoting74674
-Node: Sample Data Files75342
-Node: Very Simple78374
-Node: Two Rules82971
-Node: More Complex85118
-Ref: More Complex-Footnote-188048
-Node: Statements/Lines88128
-Ref: Statements/Lines-Footnote-192484
-Node: Other Features92749
-Node: When93618
-Node: Regexp95761
-Node: Regexp Usage97215
-Node: Escape Sequences99241
-Node: Regexp Operators104984
-Ref: Regexp Operators-Footnote-1112156
-Ref: Regexp Operators-Footnote-2112303
-Node: Character Lists112401
-Ref: table-char-classes114176
-Node: GNU Regexp Operators116801
-Node: Case-sensitivity120514
-Ref: Case-sensitivity-Footnote-1123469
-Ref: Case-sensitivity-Footnote-2123704
-Node: Leftmost Longest123812
-Node: Computed Regexps125013
-Node: Locales128430
-Node: Reading Files131520
-Node: Records133536
-Ref: Records-Footnote-1142102
-Node: Fields142139
-Ref: Fields-Footnote-1145171
-Node: Nonconstant Fields145257
-Node: Changing Fields147459
-Node: Field Separators152744
-Node: Default Field Splitting155373
-Node: Regexp Field Splitting156490
-Node: Single Character Fields159840
-Node: Command Line Field Separator160891
-Node: Field Splitting Summary164330
-Ref: Field Splitting Summary-Footnote-1167516
-Node: Constant Size167617
-Node: Splitting By Content172088
-Ref: Splitting By Content-Footnote-1175690
-Node: Multiple Line175730
-Ref: Multiple Line-Footnote-1181470
-Node: Getline181649
-Node: Plain Getline183870
-Node: Getline/Variable185959
-Node: Getline/File187100
-Node: Getline/Variable/File188422
-Ref: Getline/Variable/File-Footnote-1190021
-Node: Getline/Pipe190108
-Node: Getline/Variable/Pipe192656
-Node: Getline/Coprocess193763
-Node: Getline/Variable/Coprocess195006
-Node: Getline Notes195720
-Node: Getline Summary197662
-Ref: table-getline-variants197946
-Node: BEGINFILE/ENDFILE198851
-Node: Command line directories201706
-Node: Printing202341
-Node: Print203972
-Node: Print Examples205309
-Node: Output Separators208093
-Node: OFMT209852
-Node: Printf211210
-Node: Basic Printf212116
-Node: Control Letters213653
-Node: Format Modifiers217465
-Node: Printf Examples223476
-Node: Redirection226191
-Node: Special Files233169
-Node: Special FD233702
-Ref: Special FD-Footnote-1237277
-Node: Special Network237351
-Node: Special Caveats238206
-Node: Close Files And Pipes239000
-Ref: Close Files And Pipes-Footnote-1245944
-Ref: Close Files And Pipes-Footnote-2246092
-Node: Expressions246242
-Node: Values247311
-Node: Constants247987
-Node: Scalar Constants248667
-Ref: Scalar Constants-Footnote-1249526
-Node: Nondecimal-numbers249708
-Node: Regexp Constants252767
-Node: Using Constant Regexps253242
-Node: Variables256247
-Node: Using Variables256902
-Node: Assignment Options258629
-Node: Conversion260510
-Ref: table-locale-affects265884
-Ref: Conversion-Footnote-1266508
-Node: All Operators266617
-Node: Arithmetic Ops267247
-Node: Concatenation269746
-Ref: Concatenation-Footnote-1272539
-Node: Assignment Ops272658
-Ref: table-assign-ops277646
-Node: Increment Ops279047
-Node: Truth Values and Conditions282525
-Node: Truth Values283608
-Node: Typing and Comparison284656
-Node: Variable Typing285377
-Ref: Variable Typing-Footnote-1289274
-Node: Comparison Operators289396
-Ref: table-relational-ops289774
-Node: Boolean Ops293323
-Ref: Boolean Ops-Footnote-1297401
-Node: Conditional Exp297492
-Node: Function Calls299224
-Node: Precedence302783
-Node: Patterns and Actions306436
-Node: Pattern Overview307490
-Node: Regexp Patterns308927
-Node: Expression Patterns309470
-Node: Ranges313020
-Node: BEGIN/END316109
-Node: Using BEGIN/END316859
-Ref: Using BEGIN/END-Footnote-1319590
-Node: I/O And BEGIN/END319704
-Node: Empty321971
-Node: Using Shell Variables322279
-Node: Action Overview324560
-Node: Statements326918
-Node: If Statement328774
-Node: While Statement330273
-Node: Do Statement332305
-Node: For Statement333454
-Node: Switch Statement336594
-Node: Break Statement338642
-Node: Continue Statement340462
-Node: Next Statement342160
-Node: Nextfile Statement344440
-Node: Exit Statement347158
-Node: Built-in Variables349429
-Node: User-modified350524
-Ref: User-modified-Footnote-1358490
-Node: Auto-set358552
-Ref: Auto-set-Footnote-1367214
-Node: ARGC and ARGV367419
-Node: Arrays371180
-Node: Array Basics372689
-Node: Array Intro373400
-Node: Reference to Elements377787
-Node: Assigning Elements379686
-Node: Array Example380177
-Node: Scanning an Array381909
-Node: Delete384186
-Ref: Delete-Footnote-1386576
-Node: Numeric Array Subscripts386633
-Node: Uninitialized Subscripts388820
-Node: Multi-dimensional390426
-Node: Multi-scanning393517
-Node: Array Sorting395101
-Node: Arrays of Arrays398931
-Node: Functions403039
-Node: Built-in403848
-Node: Calling Built-in404862
-Node: Numeric Functions406838
-Ref: Numeric Functions-Footnote-1410592
-Ref: Numeric Functions-Footnote-2410926
-Node: String Functions411195
-Ref: String Functions-Footnote-1433029
-Ref: String Functions-Footnote-2433158
-Ref: String Functions-Footnote-3433406
-Node: Gory Details433493
-Ref: table-sub-escapes435150
-Ref: table-sub-posix-92436496
-Ref: table-sub-proposed437839
-Ref: table-posix-2001-sub439199
-Ref: table-gensub-escapes440474
-Ref: Gory Details-Footnote-1441677
-Node: I/O Functions441728
-Ref: I/O Functions-Footnote-1448516
-Node: Time Functions448607
-Ref: Time Functions-Footnote-1459419
-Ref: Time Functions-Footnote-2459487
-Ref: Time Functions-Footnote-3459645
-Ref: Time Functions-Footnote-4459756
-Ref: Time Functions-Footnote-5459883
-Ref: Time Functions-Footnote-6460110
-Node: Bitwise Functions460376
-Ref: table-bitwise-ops460954
-Ref: Bitwise Functions-Footnote-1465194
-Node: I18N Functions465378
-Node: User-defined467101
-Node: Definition Syntax467905
-Node: Function Example472603
-Node: Function Caveats475185
-Node: Return Statement479110
-Node: Dynamic Typing481767
-Node: Indirect Calls482504
-Node: Internationalization492139
-Node: I18N and L10N493558
-Node: Explaining gettext494242
-Ref: Explaining gettext-Footnote-1499153
-Ref: Explaining gettext-Footnote-2499392
-Node: Programmer i18n499561
-Node: Translator i18n503796
-Node: String Extraction504587
-Ref: String Extraction-Footnote-1505544
-Node: Printf Ordering505670
-Ref: Printf Ordering-Footnote-1508450
-Node: I18N Portability508514
-Ref: I18N Portability-Footnote-1510959
-Node: I18N Example511022
-Ref: I18N Example-Footnote-1513642
-Node: Gawk I18N513714
-Node: Advanced Features514292
-Node: Nondecimal Data515607
-Node: Two-way I/O517168
-Ref: Two-way I/O-Footnote-1522651
-Node: TCP/IP Networking522728
-Node: Profiling525518
-Node: Invoking Gawk532979
-Node: Command Line534286
-Node: Options535071
-Ref: Options-Footnote-1548159
-Node: Other Arguments548184
-Node: AWKPATH Variable550865
-Ref: AWKPATH Variable-Footnote-1553640
-Node: Exit Status553900
-Node: Include Files554572
-Node: Obsolete558173
-Node: Undocumented558974
-Node: Known Bugs559236
-Node: Library Functions559838
-Ref: Library Functions-Footnote-1562819
-Node: Library Names562990
-Ref: Library Names-Footnote-1566463
-Ref: Library Names-Footnote-2566682
-Node: General Functions566768
-Node: Nextfile Function567831
-Node: Strtonum Function572195
-Node: Assert Function575136
-Node: Round Function578440
-Node: Cliff Random Function579980
-Node: Ordinal Functions580995
-Ref: Ordinal Functions-Footnote-1584055
-Node: Join Function584271
-Ref: Join Function-Footnote-1586033
-Node: Gettimeofday Function586233
-Node: Data File Management589944
-Node: Filetrans Function590576
-Node: Rewind Function594002
-Node: File Checking595448
-Node: Empty Files596478
-Node: Ignoring Assigns598703
-Node: Getopt Function600251
-Ref: Getopt Function-Footnote-1611533
-Node: Passwd Functions611736
-Ref: Passwd Functions-Footnote-1620714
-Node: Group Functions620802
-Node: Sample Programs628899
-Node: Running Examples629568
-Node: Clones630296
-Node: Cut Program631428
-Node: Egrep Program641187
-Ref: Egrep Program-Footnote-1648937
-Node: Id Program649047
-Node: Split Program652654
-Node: Tee Program656122
-Node: Uniq Program658865
-Node: Wc Program666232
-Ref: Wc Program-Footnote-1670476
-Node: Miscellaneous Programs670672
-Node: Dupword Program671792
-Node: Alarm Program673823
-Node: Translate Program678365
-Ref: Translate Program-Footnote-1682744
-Ref: Translate Program-Footnote-2682981
-Node: Labels Program683115
-Ref: Labels Program-Footnote-1686406
-Node: Word Sorting686490
-Node: History Sorting690837
-Node: Extract Program692675
-Node: Simple Sed700033
-Node: Igawk Program703090
-Ref: Igawk Program-Footnote-1717821
-Ref: Igawk Program-Footnote-2718022
-Node: Signature Program718160
-Node: Debugger719240
-Node: Debugging720116
-Node: Debugging Concepts720430
-Node: Debugging Terms722283
-Node: Awk Debugging724831
-Node: Sample dgawk session725723
-Node: dgawk invocation726215
-Node: Finding The Bug727399
-Node: List of Debugger Commands733914
-Node: Breakpoint Control735229
-Node: Dgawk Execution Control738439
-Node: Viewing And Changing Data741788
-Node: Dgawk Stack745084
-Node: Dgawk Info746545
-Node: Miscellaneous Dgawk Commands750483
-Node: Readline Support756199
-Node: Dgawk Limitations757015
-Node: Language History759187
-Node: V7/SVR3.1760564
-Node: SVR4762859
-Node: POSIX764304
-Node: BTL766016
-Node: POSIX/GNU767706
-Node: Contributors777370
-Node: Installation780975
-Node: Gawk Distribution781946
-Node: Getting782430
-Node: Extracting783256
-Node: Distribution contents784644
-Node: Unix Installation789717
-Node: Quick Installation790308
-Node: Additional Configuration Options792010
-Node: Configuration Philosophy793773
-Node: Non-Unix Installation796137
-Node: PC Installation796602
-Node: PC Binary Installation797908
-Node: PC Compiling799751
-Node: PC Dynamic804256
-Node: PC Using806619
-Node: Cygwin811167
-Node: MSYS812151
-Node: VMS Installation812657
-Node: VMS Compilation813261
-Node: VMS Installation Details814838
-Node: VMS Running816468
-Node: VMS POSIX818065
-Node: VMS Old Gawk819363
-Node: Unsupported819832
-Node: Atari Installation820294
-Node: Atari Compiling821581
-Node: Atari Using823470
-Node: BeOS Installation826317
-Node: Tandem Installation827462
-Node: Bugs829141
-Node: Other Versions832973
-Node: Notes838195
-Node: Compatibility Mode838887
-Node: Additions839670
-Node: Adding Code840420
-Node: New Ports846472
-Node: Dynamic Extensions850604
-Node: Internals851985
-Node: Plugin License862390
-Node: Sample Library863024
-Node: Internal File Description863688
-Node: Internal File Ops867383
-Ref: Internal File Ops-Footnote-1872259
-Node: Using Internal File Ops872407
-Node: Future Extensions874432
-Node: Basic Concepts878469
-Node: Basic High Level879226
-Ref: Basic High Level-Footnote-1883342
-Node: Basic Data Typing883536
-Node: Floating Point Issues887973
-Node: String Conversion Precision889056
-Ref: String Conversion Precision-Footnote-1890750
-Node: Unexpected Results890859
-Node: POSIX Floating Point Problems892685
-Ref: POSIX Floating Point Problems-Footnote-1896384
-Node: Glossary896422
-Node: Copying920190
-Node: GNU Free Documentation License957747
-Node: next-edition982891
-Node: unresolved983243
-Node: revision983743
-Node: consistency984166
-Node: Index987519
+Node: Foreword29942
+Node: Preface34258
+Ref: Preface-Footnote-137210
+Ref: Preface-Footnote-237316
+Node: History37548
+Node: Names39780
+Ref: Names-Footnote-141257
+Node: This Manual41329
+Ref: This Manual-Footnote-146227
+Node: Conventions46327
+Node: Manual History48386
+Ref: Manual History-Footnote-151564
+Ref: Manual History-Footnote-251605
+Node: How To Contribute51679
+Node: Acknowledgments52823
+Node: Getting Started57092
+Node: Running gawk59464
+Node: One-shot60650
+Node: Read Terminal61875
+Ref: Read Terminal-Footnote-163525
+Ref: Read Terminal-Footnote-263799
+Node: Long63970
+Node: Executable Scripts65346
+Ref: Executable Scripts-Footnote-167207
+Ref: Executable Scripts-Footnote-267309
+Node: Comments67760
+Node: Quoting70128
+Node: DOS Quoting74745
+Node: Sample Data Files75413
+Node: Very Simple78445
+Node: Two Rules83042
+Node: More Complex85189
+Ref: More Complex-Footnote-188119
+Node: Statements/Lines88199
+Ref: Statements/Lines-Footnote-192555
+Node: Other Features92820
+Node: When93689
+Node: Regexp95832
+Node: Regexp Usage97286
+Node: Escape Sequences99312
+Node: Regexp Operators105055
+Ref: Regexp Operators-Footnote-1112227
+Ref: Regexp Operators-Footnote-2112374
+Node: Character Lists112472
+Ref: table-char-classes114247
+Node: GNU Regexp Operators116872
+Node: Case-sensitivity120585
+Ref: Case-sensitivity-Footnote-1123540
+Ref: Case-sensitivity-Footnote-2123775
+Node: Leftmost Longest123883
+Node: Computed Regexps125084
+Node: Locales128501
+Node: Reading Files131591
+Node: Records133607
+Ref: Records-Footnote-1142173
+Node: Fields142210
+Ref: Fields-Footnote-1145242
+Node: Nonconstant Fields145328
+Node: Changing Fields147530
+Node: Field Separators152815
+Node: Default Field Splitting155444
+Node: Regexp Field Splitting156561
+Node: Single Character Fields159911
+Node: Command Line Field Separator160962
+Node: Field Splitting Summary164401
+Ref: Field Splitting Summary-Footnote-1167587
+Node: Constant Size167688
+Node: Splitting By Content172159
+Ref: Splitting By Content-Footnote-1175761
+Node: Multiple Line175801
+Ref: Multiple Line-Footnote-1181541
+Node: Getline181720
+Node: Plain Getline183941
+Node: Getline/Variable186030
+Node: Getline/File187171
+Node: Getline/Variable/File188493
+Ref: Getline/Variable/File-Footnote-1190092
+Node: Getline/Pipe190179
+Node: Getline/Variable/Pipe192727
+Node: Getline/Coprocess193834
+Node: Getline/Variable/Coprocess195077
+Node: Getline Notes195791
+Node: Getline Summary197733
+Ref: table-getline-variants198017
+Node: BEGINFILE/ENDFILE198922
+Node: Command line directories201777
+Node: Printing202412
+Node: Print204043
+Node: Print Examples205380
+Node: Output Separators208164
+Node: OFMT209923
+Node: Printf211281
+Node: Basic Printf212187
+Node: Control Letters213724
+Node: Format Modifiers217536
+Node: Printf Examples223547
+Node: Redirection226262
+Node: Special Files233240
+Node: Special FD233773
+Ref: Special FD-Footnote-1237348
+Node: Special Network237422
+Node: Special Caveats238277
+Node: Close Files And Pipes239071
+Ref: Close Files And Pipes-Footnote-1246015
+Ref: Close Files And Pipes-Footnote-2246163
+Node: Expressions246313
+Node: Values247382
+Node: Constants248058
+Node: Scalar Constants248738
+Ref: Scalar Constants-Footnote-1249597
+Node: Nondecimal-numbers249779
+Node: Regexp Constants252838
+Node: Using Constant Regexps253313
+Node: Variables256318
+Node: Using Variables256973
+Node: Assignment Options258700
+Node: Conversion260581
+Ref: table-locale-affects265955
+Ref: Conversion-Footnote-1266579
+Node: All Operators266688
+Node: Arithmetic Ops267318
+Node: Concatenation269817
+Ref: Concatenation-Footnote-1272610
+Node: Assignment Ops272729
+Ref: table-assign-ops277717
+Node: Increment Ops279118
+Node: Truth Values and Conditions282596
+Node: Truth Values283679
+Node: Typing and Comparison284727
+Node: Variable Typing285516
+Ref: Variable Typing-Footnote-1289413
+Node: Comparison Operators289535
+Ref: table-relational-ops289945
+Node: POSIX String Comparison293494
+Ref: POSIX String Comparison-Footnote-1294451
+Node: Boolean Ops294589
+Ref: Boolean Ops-Footnote-1298667
+Node: Conditional Exp298758
+Node: Function Calls300490
+Node: Precedence304049
+Node: Patterns and Actions307702
+Node: Pattern Overview308756
+Node: Regexp Patterns310193
+Node: Expression Patterns310736
+Node: Ranges314286
+Node: BEGIN/END317375
+Node: Using BEGIN/END318125
+Ref: Using BEGIN/END-Footnote-1320856
+Node: I/O And BEGIN/END320970
+Node: Empty323237
+Node: Using Shell Variables323545
+Node: Action Overview325826
+Node: Statements328184
+Node: If Statement330040
+Node: While Statement331539
+Node: Do Statement333571
+Node: For Statement334720
+Node: Switch Statement337860
+Node: Break Statement339908
+Node: Continue Statement341728
+Node: Next Statement343426
+Node: Nextfile Statement345706
+Node: Exit Statement348424
+Node: Built-in Variables350695
+Node: User-modified351790
+Ref: User-modified-Footnote-1359756
+Node: Auto-set359818
+Ref: Auto-set-Footnote-1368480
+Node: ARGC and ARGV368685
+Node: Arrays372446
+Node: Array Basics373955
+Node: Array Intro374666
+Node: Reference to Elements379053
+Node: Assigning Elements380952
+Node: Array Example381443
+Node: Scanning an Array383175
+Node: Delete385452
+Ref: Delete-Footnote-1387842
+Node: Numeric Array Subscripts387899
+Node: Uninitialized Subscripts390086
+Node: Multi-dimensional391692
+Node: Multi-scanning394783
+Node: Array Sorting396367
+Node: Arrays of Arrays400197
+Node: Functions404305
+Node: Built-in405114
+Node: Calling Built-in406128
+Node: Numeric Functions408104
+Ref: Numeric Functions-Footnote-1411858
+Ref: Numeric Functions-Footnote-2412192
+Node: String Functions412461
+Ref: String Functions-Footnote-1434295
+Ref: String Functions-Footnote-2434424
+Ref: String Functions-Footnote-3434672
+Node: Gory Details434759
+Ref: table-sub-escapes436416
+Ref: table-sub-posix-92437762
+Ref: table-sub-proposed439105
+Ref: table-posix-2001-sub440465
+Ref: table-gensub-escapes441740
+Ref: Gory Details-Footnote-1442943
+Node: I/O Functions442994
+Ref: I/O Functions-Footnote-1449782
+Node: Time Functions449873
+Ref: Time Functions-Footnote-1460685
+Ref: Time Functions-Footnote-2460753
+Ref: Time Functions-Footnote-3460911
+Ref: Time Functions-Footnote-4461022
+Ref: Time Functions-Footnote-5461149
+Ref: Time Functions-Footnote-6461376
+Node: Bitwise Functions461642
+Ref: table-bitwise-ops462220
+Ref: Bitwise Functions-Footnote-1466460
+Node: I18N Functions466644
+Node: User-defined468367
+Node: Definition Syntax469171
+Node: Function Example473869
+Node: Function Caveats476451
+Node: Return Statement480376
+Node: Dynamic Typing483033
+Node: Indirect Calls483770
+Node: Internationalization493405
+Node: I18N and L10N494824
+Node: Explaining gettext495508
+Ref: Explaining gettext-Footnote-1500419
+Ref: Explaining gettext-Footnote-2500658
+Node: Programmer i18n500827
+Node: Translator i18n505062
+Node: String Extraction505853
+Ref: String Extraction-Footnote-1506810
+Node: Printf Ordering506936
+Ref: Printf Ordering-Footnote-1509716
+Node: I18N Portability509780
+Ref: I18N Portability-Footnote-1512225
+Node: I18N Example512288
+Ref: I18N Example-Footnote-1514908
+Node: Gawk I18N514980
+Node: Advanced Features515558
+Node: Nondecimal Data516873
+Node: Two-way I/O518434
+Ref: Two-way I/O-Footnote-1523917
+Node: TCP/IP Networking523994
+Node: Profiling526784
+Node: Invoking Gawk534245
+Node: Command Line535552
+Node: Options536337
+Ref: Options-Footnote-1549425
+Node: Other Arguments549450
+Node: AWKPATH Variable552131
+Ref: AWKPATH Variable-Footnote-1554906
+Node: Exit Status555166
+Node: Include Files555838
+Node: Obsolete559439
+Node: Undocumented560240
+Node: Known Bugs560502
+Node: Library Functions561104
+Ref: Library Functions-Footnote-1564085
+Node: Library Names564256
+Ref: Library Names-Footnote-1567729
+Ref: Library Names-Footnote-2567948
+Node: General Functions568034
+Node: Nextfile Function569097
+Node: Strtonum Function573461
+Node: Assert Function576402
+Node: Round Function579706
+Node: Cliff Random Function581246
+Node: Ordinal Functions582261
+Ref: Ordinal Functions-Footnote-1585321
+Node: Join Function585537
+Ref: Join Function-Footnote-1587299
+Node: Gettimeofday Function587499
+Node: Data File Management591210
+Node: Filetrans Function591842
+Node: Rewind Function595268
+Node: File Checking596714
+Node: Empty Files597744
+Node: Ignoring Assigns599969
+Node: Getopt Function601517
+Ref: Getopt Function-Footnote-1612799
+Node: Passwd Functions613002
+Ref: Passwd Functions-Footnote-1621980
+Node: Group Functions622068
+Node: Sample Programs630165
+Node: Running Examples630834
+Node: Clones631562
+Node: Cut Program632694
+Node: Egrep Program642453
+Ref: Egrep Program-Footnote-1650203
+Node: Id Program650313
+Node: Split Program653920
+Node: Tee Program657388
+Node: Uniq Program660131
+Node: Wc Program667498
+Ref: Wc Program-Footnote-1671742
+Node: Miscellaneous Programs671938
+Node: Dupword Program673058
+Node: Alarm Program675089
+Node: Translate Program679631
+Ref: Translate Program-Footnote-1684010
+Ref: Translate Program-Footnote-2684247
+Node: Labels Program684381
+Ref: Labels Program-Footnote-1687672
+Node: Word Sorting687756
+Node: History Sorting692103
+Node: Extract Program693941
+Node: Simple Sed701299
+Node: Igawk Program704356
+Ref: Igawk Program-Footnote-1719087
+Ref: Igawk Program-Footnote-2719288
+Node: Signature Program719426
+Node: Debugger720506
+Node: Debugging721382
+Node: Debugging Concepts721696
+Node: Debugging Terms723549
+Node: Awk Debugging726097
+Node: Sample dgawk session726989
+Node: dgawk invocation727481
+Node: Finding The Bug728665
+Node: List of Debugger Commands735180
+Node: Breakpoint Control736495
+Node: Dgawk Execution Control739705
+Node: Viewing And Changing Data743054
+Node: Dgawk Stack746350
+Node: Dgawk Info747811
+Node: Miscellaneous Dgawk Commands751749
+Node: Readline Support757465
+Node: Dgawk Limitations758281
+Node: Language History760453
+Node: V7/SVR3.1761830
+Node: SVR4764125
+Node: POSIX765570
+Node: BTL767282
+Node: POSIX/GNU768972
+Node: Contributors778636
+Node: Installation782241
+Node: Gawk Distribution783212
+Node: Getting783696
+Node: Extracting784522
+Node: Distribution contents785910
+Node: Unix Installation790983
+Node: Quick Installation791574
+Node: Additional Configuration Options793276
+Node: Configuration Philosophy795039
+Node: Non-Unix Installation797403
+Node: PC Installation797868
+Node: PC Binary Installation799174
+Node: PC Compiling801017
+Node: PC Dynamic805522
+Node: PC Using807885
+Node: Cygwin812433
+Node: MSYS813417
+Node: VMS Installation813923
+Node: VMS Compilation814527
+Node: VMS Installation Details816104
+Node: VMS Running817734
+Node: VMS POSIX819331
+Node: VMS Old Gawk820629
+Node: Unsupported821098
+Node: Atari Installation821560
+Node: Atari Compiling822847
+Node: Atari Using824736
+Node: BeOS Installation827583
+Node: Tandem Installation828728
+Node: Bugs830407
+Node: Other Versions834239
+Node: Notes839461
+Node: Compatibility Mode840153
+Node: Additions840936
+Node: Adding Code841686
+Node: New Ports847738
+Node: Dynamic Extensions851870
+Node: Internals853251
+Node: Plugin License863656
+Node: Sample Library864290
+Node: Internal File Description864954
+Node: Internal File Ops868649
+Ref: Internal File Ops-Footnote-1873525
+Node: Using Internal File Ops873673
+Node: Future Extensions875698
+Node: Basic Concepts879735
+Node: Basic High Level880492
+Ref: Basic High Level-Footnote-1884608
+Node: Basic Data Typing884802
+Node: Floating Point Issues889239
+Node: String Conversion Precision890322
+Ref: String Conversion Precision-Footnote-1892016
+Node: Unexpected Results892125
+Node: POSIX Floating Point Problems893951
+Ref: POSIX Floating Point Problems-Footnote-1897650
+Node: Glossary897688
+Node: Copying921456
+Node: GNU Free Documentation License959013
+Node: next-edition984157
+Node: unresolved984509
+Node: revision985009
+Node: consistency985432
+Node: Index988785

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 28692a39..59770d5f 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -417,6 +417,7 @@ particular records in a file and perform operations upon them.
with @samp{<}, etc.
* Variable Typing:: String type versus numeric type.
* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.
* Boolean Ops:: Combining comparison expressions using
boolean operators @samp{||} (``or''),
@samp{&&} (``and'') and @samp{!} (``not'').
@@ -8938,6 +8939,7 @@ compares variables.
@menu
* Variable Typing:: String type versus numeric type.
* Comparison Operators:: The comparison operators.
+* POSIX String Comparison:: String comparison with POSIX rules.
@end menu
@node Variable Typing
@@ -9154,8 +9156,8 @@ the longer one. Thus, @code{"abc"} is less than @code{"abcd"}.
@cindex troubleshooting, @code{==} operator
It is very easy to accidentally mistype the @samp{==} operator and
-leave off one of the @samp{=} characters. The result is still valid @command{awk}
-code, but the program does not do what is intended:
+leave off one of the @samp{=} characters. The result is still valid
+@command{awk} code, but the program does not do what is intended:
@example
if (a = b) # oops! should be a == b
@@ -9258,6 +9260,31 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
@samp{!~}.
@xref{Using Constant Regexps},
where this is discussed in more detail.
+
+@node POSIX String Comparison
+@subsubsection String comparison with POSIX rules.
+
+The POSIX standard says that string comparison is performed based
+on the locale's collating order. This is usually very different
+from the results obtained when doing straight character-by-character
+comparison.@footnote{Technically, string comparison is supposed
+to behave the same way as if the strings are compared with the C
+@code{strcoll()} function.}
+
+Because this behavior differs considerably from existing practice,
+@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
+Here is an example to illustrate the difference, in a @code{en_US.UTF-8}
+locale:
+
+@example
+$ @kbd{gawk 'BEGIN @{ printf("ABC < abc = %s\n",}
+> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'}
+@print{} ABC < abc = TRUE
+$ @kbd{gawk --posix 'BEGIN @{ printf("ABC < abc = %s\n",}
+> @kbd{("ABC" < "abc" ? "TRUE" : "FALSE")) @}'}
+@print{} ABC < abc = FALSE
+@end example
+
@c ENDOFRANGE comex
@c ENDOFRANGE excom
@c ENDOFRANGE vartypc
diff --git a/eval.c b/eval.c
index 6c5aaeda..fea990af 100644
--- a/eval.c
+++ b/eval.c
@@ -500,6 +500,87 @@ genflags2str(int flagval, const struct flagtab *tab)
return buffer;
}
+/* posix_compare --- compare strings using strcoll */
+
+static int
+posix_compare(NODE *s1, NODE *s2)
+{
+ int ret = 0;
+ char save1, save2;
+ size_t l = 0;
+
+ save1 = s1->stptr[s1->stlen];
+ s1->stptr[s1->stlen] = '\0';
+
+ save2 = s2->stptr[s2->stlen];
+ s2->stptr[s2->stlen] = '\0';
+
+ if (gawk_mb_cur_max == 1) {
+ if (strlen(s1->stptr) == s1->stlen && strlen(s2->stptr) == s2->stlen)
+ ret = strcoll(s1->stptr, s2->stptr);
+ else {
+ char b1[2], b2[2];
+ char *p1, *p2;
+ size_t i;
+
+ if (s1->stlen < s2->stlen)
+ l = s1->stlen;
+ else
+ l = s2->stlen;
+
+ b1[1] = b2[1] = '\0';
+ for (i = ret = 0, p1 = s1->stptr, p2 = s2->stptr;
+ ret == 0 && i < l;
+ p1++, p2++) {
+ b1[0] = *p1;
+ b2[0] = *p2;
+ ret = strcoll(b1, b2);
+ }
+ }
+ /*
+ * Either worked through the strings or ret != 0.
+ * In either case, ret will be the right thing to return.
+ */
+ }
+#ifdef MBS_SUPPORT
+ else {
+ /* Similar logic, using wide characters */
+ (void) force_wstring(s1);
+ (void) force_wstring(s2);
+
+ if (wcslen(s1->wstptr) == s1->wstlen && wcslen(s2->wstptr) == s2->wstlen)
+ ret = wcscoll(s1->wstptr, s2->wstptr);
+ else {
+ wchar_t b1[2], b2[2];
+ wchar_t *p1, *p2;
+ size_t i;
+
+ if (s1->wstlen < s2->wstlen)
+ l = s1->wstlen;
+ else
+ l = s2->wstlen;
+
+ b1[1] = b2[1] = L'\0';
+ for (i = ret = 0, p1 = s1->wstptr, p2 = s2->wstptr;
+ ret == 0 && i < l;
+ p1++, p2++) {
+ b1[0] = *p1;
+ b2[0] = *p2;
+ ret = wcscoll(b1, b2);
+ }
+ }
+ /*
+ * Either worked through the strings or ret != 0.
+ * In either case, ret will be the right thing to return.
+ */
+ }
+#endif
+
+ s1->stptr[s1->stlen] = save1;
+ s2->stptr[s2->stlen] = save2;
+ return ret;
+}
+
/* cmp_nodes --- compare two nodes, returning negative, 0, positive */
@@ -535,6 +616,10 @@ cmp_nodes(NODE *t1, NODE *t2)
ldiff = len1 - len2;
if (len1 == 0 || len2 == 0)
return ldiff;
+
+ if (do_posix)
+ return posix_compare(t1, t2);
+
l = (ldiff <= 0 ? len1 : len2);
if (IGNORECASE) {
const unsigned char *cp1 = (const unsigned char *) t1->stptr;
diff --git a/missing_d/ChangeLog b/missing_d/ChangeLog
index 6430217f..40e4ede0 100644
--- a/missing_d/ChangeLog
+++ b/missing_d/ChangeLog
@@ -1,3 +1,9 @@
+Thu Nov 25 21:16:58 2010 Arnold D. Robbins <arnold@skeeve.com>
+
+ * strcoll.c: New file.
+ * memcmp.c, memcpy.c, memset.c, strchr.c, strtod.c: Remove
+ `register' keyword everywhere, as in mainline code.
+
Sun Jun 6 21:44:19 2010 Arnold D. Robbins <arnold@skeeve.com>
* getaddrinfo.h: Add undef for addrinfo, freeaddrinfo, and
diff --git a/missing_d/memcmp.c b/missing_d/memcmp.c
index 63cb5f8f..12b3775d 100644
--- a/missing_d/memcmp.c
+++ b/missing_d/memcmp.c
@@ -7,8 +7,8 @@
int
memcmp (s1, s2, l)
-register char *s1, *s2;
-register int l;
+char *s1, *s2;
+int l;
{
for (; l-- > 0; s1++, s2++) {
if (*s1 != *s2)
diff --git a/missing_d/memcpy.c b/missing_d/memcpy.c
index 3c4accdf..568a006a 100644
--- a/missing_d/memcpy.c
+++ b/missing_d/memcpy.c
@@ -6,10 +6,10 @@
char *
memcpy (dest, src, l)
-register char *dest, *src;
-register int l;
+char *dest, *src;
+int l;
{
- register char *ret = dest;
+ char *ret = dest;
while (l--)
*dest++ = *src++;
diff --git a/missing_d/memmove.c b/missing_d/memmove.c
index a28a32d1..4f577b38 100644
--- a/missing_d/memmove.c
+++ b/missing_d/memmove.c
@@ -21,7 +21,7 @@
/*
* August 2006. For Gawk: Borrowed from GLIBC and hacked unmercifully.
- * DON'T steal this for your own code, got straight to the GLIBC
+ * DON'T steal this for your own code, go straight to the GLIBC
* source for the original versions.
*/
diff --git a/missing_d/memset.c b/missing_d/memset.c
index 1ff4458b..0e509e85 100644
--- a/missing_d/memset.c
+++ b/missing_d/memset.c
@@ -7,11 +7,11 @@
void *
memset(dest, val, l)
void *dest;
-register int val;
-register size_t l;
+int val;
+size_t l;
{
- register char *ret = dest;
- register char *d = dest;
+ char *ret = dest;
+ char *d = dest;
while (l--)
*d++ = val;
diff --git a/missing_d/strchr.c b/missing_d/strchr.c
index 7da479fc..e549099d 100644
--- a/missing_d/strchr.c
+++ b/missing_d/strchr.c
@@ -10,7 +10,7 @@
char *
strchr(str, c)
-register const char *str, c;
+const char *str, c;
{
if (c == '\0') {
/* thanks to Mike Brennan ... */
@@ -35,9 +35,9 @@ register const char *str, c;
char *
strrchr(str, c)
-register const char *str, c;
+const char *str, c;
{
- register const char *save = NULL;
+ const char *save = NULL;
for (; *str; str++)
if (*str == c)
diff --git a/missing_d/strcoll.c b/missing_d/strcoll.c
new file mode 100644
index 00000000..ac65795e
--- /dev/null
+++ b/missing_d/strcoll.c
@@ -0,0 +1,7 @@
+/* replacement strcoll.c */
+
+int
+strcoll(const char *s1, const char *s2)
+{
+ return strcmp(s1, s2); /* nyah, nyah, so there */
+}
diff --git a/missing_d/strtod.c b/missing_d/strtod.c
index c4f9d2bd..570f6407 100644
--- a/missing_d/strtod.c
+++ b/missing_d/strtod.c
@@ -40,8 +40,8 @@ extern double atof();
double
gawk_strtod(s, ptr)
-register const char *s;
-register const char **ptr;
+const char *s;
+const char **ptr;
{
const char *start = s; /* save original start of string */
const char *begin = NULL; /* where the number really begins */
diff --git a/protos.h b/protos.h
index 26d520cc..0a9674d9 100644
--- a/protos.h
+++ b/protos.h
@@ -106,3 +106,7 @@ extern void _exit(int);
extern SPRINTF_RET sprintf(char *, const char *, ...);
#undef aptr_t
+
+#if !defined(HAVE_STRCOLL)
+extern int strcoll(const char *, const char *);
+#endif
diff --git a/replace.c b/replace.c
index ea99822d..8ff74a87 100644
--- a/replace.c
+++ b/replace.c
@@ -100,3 +100,7 @@
#ifndef HAVE_SETENV
#include "missing_d/setenv.c"
#endif
+
+#ifndef HAVE_STRCOLL
+#include "missing_d/strcoll.c"
+#endif