diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-15 22:57:04 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-15 22:57:04 +0300 |
commit | ea2e6b8b87ba6cdd6145a844dac3938678221988 (patch) | |
tree | 6cedda623d031b208a32ba18d26c61d86c5d6411 | |
parent | c6c0baa03be793460f7a1ab90882d49134724503 (diff) | |
download | gawk-2.10-from-net.tar.gz |
Removed files from gawk-2.10gawk-2.10-from-netgawk-2.10-from-net
-rw-r--r-- | att.getopt.c | 94 | ||||
-rw-r--r-- | gawk-info | 6151 | ||||
-rw-r--r-- | gawk-info-1 | 1231 | ||||
-rw-r--r-- | gawk-info-2 | 1265 | ||||
-rw-r--r-- | gawk-info-3 | 1385 | ||||
-rw-r--r-- | gawk-info-4 | 1400 | ||||
-rw-r--r-- | gawk-info-5 | 960 | ||||
-rw-r--r-- | gawk.1 | 1344 | ||||
-rw-r--r-- | gawk.aux | 202 | ||||
-rw-r--r-- | gawk.cp | 234 | ||||
-rw-r--r-- | gawk.cps | 253 | ||||
-rw-r--r-- | gawk.dvi | bin | 320368 -> 0 bytes | |||
-rw-r--r-- | gawk.fn | 10 | ||||
-rw-r--r-- | gawk.fns | 13 | ||||
-rw-r--r-- | gawk.ky | 0 | ||||
-rw-r--r-- | gawk.kys | 0 | ||||
-rw-r--r-- | gawk.pg | 0 | ||||
-rw-r--r-- | gawk.pgs | 0 | ||||
-rw-r--r-- | gawk.texinfo | 6587 | ||||
-rw-r--r-- | gawk.toc | 104 | ||||
-rw-r--r-- | gawk.tp | 0 | ||||
-rw-r--r-- | gawk.tps | 0 | ||||
-rw-r--r-- | gawk.vr | 17 | ||||
-rw-r--r-- | gawk.vrs | 21 | ||||
-rw-r--r-- | gnu.getopt.c | 417 | ||||
-rw-r--r-- | makefile.pc | 169 |
26 files changed, 0 insertions, 21857 deletions
diff --git a/att.getopt.c b/att.getopt.c deleted file mode 100644 index df68405f..00000000 --- a/att.getopt.c +++ /dev/null @@ -1,94 +0,0 @@ -/* -** @(#)getopt.c 2.5 (smail) 9/15/87 -*/ - -/* - * Here's something you've all been waiting for: the AT&T public domain - * source for getopt(3). It is the code which was given out at the 1985 - * UNIFORUM conference in Dallas. I obtained it by electronic mail - * directly from AT&T. The people there assure me that it is indeed - * in the public domain. - * - * There is no manual page. That is because the one they gave out at - * UNIFORUM was slightly different from the current System V Release 2 - * manual page. The difference apparently involved a note about the - * famous rules 5 and 6, recommending using white space between an option - * and its first argument, and not grouping options that have arguments. - * Getopt itself is currently lenient about both of these things White - * space is allowed, but not mandatory, and the last option in a group can - * have an argument. That particular version of the man page evidently - * has no official existence, and my source at AT&T did not send a copy. - * The current SVR2 man page reflects the actual behavor of this getopt. - * However, I am not about to post a copy of anything licensed by AT&T. - */ - -/* This include is needed only to get "index" defined as "strchr" on Sys V. */ -#ifdef MSDOS -#define index strchr -#else -#include "defs.h" -#endif - -/*LINTLIBRARY*/ -#define NULL 0 -#define EOF (-1) -#define ERR(s, c) if(opterr){\ - extern int write();\ - char errbuf[2];\ - errbuf[0] = c; errbuf[1] = '\n';\ - (void) write(2, argv[0], (unsigned)strlen(argv[0]));\ - (void) write(2, s, (unsigned)strlen(s));\ - (void) write(2, errbuf, 2);} - -extern char *index(); - -int opterr = 1; -int optind = 1; -int optopt; -char *optarg; - -int -getopt(argc, argv, opts) -int argc; -char **argv, *opts; -{ - static int sp = 1; - register int c; - register char *cp; - - if(sp == 1) - if(optind >= argc || - argv[optind][0] != '-' || argv[optind][1] == '\0') - return(EOF); - else if(strcmp(argv[optind], "--") == NULL) { - optind++; - return(EOF); - } - optopt = c = argv[optind][sp]; - if(c == ':' || (cp=index(opts, c)) == NULL) { - ERR(": illegal option -- ", c); - if(argv[optind][++sp] == '\0') { - optind++; - sp = 1; - } - return('?'); - } - if(*++cp == ':') { - if(argv[optind][sp+1] != '\0') - optarg = &argv[optind++][sp+1]; - else if(++optind >= argc) { - ERR(": option requires an argument -- ", c); - sp = 1; - return('?'); - } else - optarg = argv[optind++]; - sp = 1; - } else { - if(argv[optind][++sp] == '\0') { - sp = 1; - optind++; - } - optarg = NULL; - } - return(c); -} diff --git a/gawk-info b/gawk-info deleted file mode 100644 index 361bd0c5..00000000 --- a/gawk-info +++ /dev/null @@ -1,6151 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: Top, Next: Preface, Prev: (dir), Up: (dir) - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them; it -contains the following chapters: - -* Menu: - -* Preface:: What you can do with `awk'; brief history - and acknowledgements. - -* License:: Your right to copy and distribute `gawk'. - -* This Manual:: Using this manual. - - Includes sample input files that you can use. - -* Getting Started:: A basic introduction to using `awk'. - How to run an `awk' program. Command line syntax. - -* Reading Files:: How to read files and manipulate fields. - -* Printing:: How to print using `awk'. Describes the - `print' and `printf' statements. - Also describes redirection of output. - -* One-liners:: Short, sample `awk' programs. - -* Patterns:: The various types of patterns explained in detail. - -* Actions:: The various types of actions are introduced here. - Describes expressions and the various operators in - detail. Also describes comparison expressions. - -* Statements:: The various control statements are described in - detail. - -* Arrays:: The description and use of arrays. Also includes - array--oriented control statements. - -* User-defined:: User--defined functions are described in detail. - -* Built-in:: The built--in functions are summarized here. - -* Special:: The special variables are summarized here. - -* Sample Program:: A sample `awk' program with a complete explanation. - -* Notes:: Something about the implementation of `gawk'. - -* Glossary:: An explanation of some unfamiliar terms. - -* Index:: - - - -File: gawk-info, Node: Preface, Next: License, Prev: Top, Up: Top - -Preface -******* - -If you are like many computer users, you frequently would like to -make changes in various text files wherever certain patterns appear, -or extract data from parts of certain lines while discarding the -rest. To write a program to do this in a language such as C or -Pascal is a time--consuming inconvenience that may take many lines of -code. The job may be easier with `awk'. - -The `awk' utility interprets a special--purpose programming language -that makes it possible to handle simple data--reformatting jobs -easily with just a few lines of code. - -The GNU implementation of `awk' is called `gawk'; it is fully upward -compatible with the System V Release 3.1 and later version of `awk'. -All properly written `awk' programs should work with `gawk'. So we -usually don't distinguish between `gawk' and other `awk' -implementations in this manual. - -This manual teaches you what `awk' does and how you can use `awk' -effectively. You should already be familiar with basic, -general--purpose, operating system commands such as `ls'. Using -`awk' you can: - - * manage small, personal databases, - - * generate reports, - - * validate data, - - * produce indexes, and perform other document preparation tasks, - - * even experiment with algorithms that can be adapted later to - other computer languages! - -* Menu: - -* History:: The history of gawk and awk. Acknowledgements. - - - -File: gawk-info, Node: History, Up: Preface - -History of `awk' and `gawk' -=========================== - -The name `awk' comes from the initials of its designers: Alfred V. -Aho, Peter J. Weinberger, and Brian W. Kernighan. The original -version of `awk' was written in 1977. In 1985 a new version made the -programming language more powerful, introducing user--defined -functions, multiple input streams, and computed regular expressions. - -The GNU implementation, `gawk', was written in 1986 by Paul Rubin and -Jay Fenlason, with advice from Richard Stallman. John Woods -contributed parts of the code as well. In 1988, David Trueman, with -help from Arnold Robbins, reworked `gawk' for compatibility with the -newer `awk'. - -Many people need to be thanked for their assistance in producing this -manual. Jay Fenlason contributed many ideas and sample programs. -Richard Mlynarik and Robert Chassell gave helpful comments on drafts -of this manual. The paper ``A Supplemental Document for `awk''' by -John W. Pierce of the Chemistry Department at UC San Diego, -pinpointed several issues relevant both to `awk' implementation and -to this manual, that would otherwise have escaped us. - -Finally, we would like to thank Brian Kernighan of Bell Labs for -invaluable assistance during the testing and debugging of `gawk', and -for help in clarifying several points about the language. - - - -File: gawk-info, Node: License, Next: This Manual, Prev: Preface, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 1, February 1989 - - Copyright (C) 1989 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble -========= - - The license agreements of most software companies try to keep users -at the mercy of those companies. By contrast, our General Public -License is intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. -The General Public License applies to the Free Software Foundation's -software and to any other program whose authors commit to using it. -You can use it for your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Specifically, the General Public License is designed to make -sure that you have the freedom to give away or sell copies of free -software, that you receive source code or can get it if you want it, -that you can change the software or use pieces of it in new free -programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if -you distribute copies of the software, or if you modify it. - - For example, if you distribute copies of a such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must tell them their rights. - - We protect your rights with two steps: (1) copyright the software, -and (2) offer you this license which gives you legal permission to -copy, distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, -we want its recipients to know that what they have is not the -original, so that any problems introduced by others will not reflect -on the original authors' reputations. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 1. This License Agreement applies to any program or other work - which contains a notice placed by the copyright holder saying it - may be distributed under the terms of this General Public - License. The ``Program'', below, refers to any such program or - work, and a ``work based on the Program'' means either the - Program or any work containing the Program or a portion of it, - either verbatim or with modifications. Each licensee is - addressed as ``you''. - - 2. You may copy and distribute verbatim copies of the Program's - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an - appropriate copyright notice and disclaimer of warranty; keep - intact all the notices that refer to this General Public License - and to the absence of any warranty; and give any other - recipients of the Program a copy of this General Public License - along with the Program. You may charge a fee for the physical - act of transferring a copy. - - 3. You may modify your copy or copies of the Program or any portion - of it, and copy and distribute such modifications under the - terms of Paragraph 1 above, provided that you also do the - following: - - * cause the modified files to carry prominent notices stating - that you changed the files and the date of any change; and - - * cause the whole of any work that you distribute or publish, - that in whole or in part contains the Program or any part - thereof, either with or without modifications, to be - licensed at no charge to all third parties under the terms - of this General Public License (except that you may choose - to grant warranty protection to some or all third parties, - at your option). - - * If the modified program normally reads commands - interactively when run, you must cause it, when started - running for such interactive use in the simplest and most - usual way, to print or display an announcement including an - appropriate copyright notice and a notice that there is no - warranty (or else, saying that you provide a warranty) and - that users may redistribute the program under these - conditions, and telling the user how to view a copy of this - General Public License. - - * You may charge a fee for the physical act of transferring a - copy, and you may at your option offer warranty protection - in exchange for a fee. - - Mere aggregation of another independent work with the Program - (or its derivative) on a volume of a storage or distribution - medium does not bring the other work under the scope of these - terms. - - 4. You may copy and distribute the Program (or a portion or - derivative of it, under Paragraph 2) in object code or - executable form under the terms of Paragraphs 1 and 2 above - provided that you also do one of the following: - - * accompany it with the complete corresponding - machine-readable source code, which must be distributed - under the terms of Paragraphs 1 and 2 above; or, - - * accompany it with a written offer, valid for at least three - years, to give any third party free (except for a nominal - charge for the cost of distribution) a complete - machine-readable copy of the corresponding source code, to - be distributed under the terms of Paragraphs 1 and 2 above; - or, - - * accompany it with the information you received as to where - the corresponding source code may be obtained. (This - alternative is allowed only for noncommercial distribution - and only if you received the program in object code or - executable form alone.) - - Source code for a work means the preferred form of the work for - making modifications to it. For an executable file, complete - source code means all the source code for all modules it - contains; but, as a special exception, it need not include - source code for modules which are standard libraries that - accompany the operating system on which the executable file - runs, or for standard header files or definitions files that - accompany that operating system. - - 5. You may not copy, modify, sublicense, distribute or transfer the - Program except as expressly provided under this General Public - License. Any attempt otherwise to copy, modify, sublicense, - distribute or transfer the Program is void, and will - automatically terminate your rights to use the Program under - this License. However, parties who have received copies, or - rights to use copies, from you under this General Public License - will not have their licenses terminated so long as such parties - remain in full compliance. - - 6. By copying, distributing or modifying the Program (or any work - based on the Program) you indicate your acceptance of this - license to do so, and all its terms and conditions. - - 7. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from - the original licensor to copy, distribute or modify the Program - subject to these terms and conditions. You may not impose any - further restrictions on the recipients' exercise of the rights - granted herein. - - 8. The Free Software Foundation may publish revised and/or new - versions of the General Public License from time to time. Such - new versions will be similar in spirit to the present version, - but may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies a version number of the license which applies - to it and ``any later version'', you have the option of - following the terms and conditions either of that version or of - any later version published by the Free Software Foundation. If - the Program does not specify a version number of the license, - you may choose any version ever published by the Free Software - Foundation. - - 9. If you wish to incorporate parts of the Program into other free - programs whose distribution conditions are different, write to - the author to ask for permission. For software which is - copyrighted by the Free Software Foundation, write to the Free - Software Foundation; we sometimes make exceptions for this. Our - decision will be guided by the two goals of preserving the free - status of all derivatives of our free software and of promoting - the sharing and reuse of software generally. - - NO WARRANTY - - 10. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO - WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE - LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' - WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE - ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS - WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE - COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 11. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY - MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE - LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, - INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR - INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS - OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY - YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH - ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - -Appendix: How to Apply These Terms to Your New Programs -======================================================= - - If you develop a new program, and you want it to be of the greatest -possible use to humanity, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Also add information on how to contact you by electronic and paper -mail. - -If the program is interactive, make it output a short notice like -this when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - - The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and -`show c'; they could even be mouse-clicks or menu items--whatever -suits your program. - -You should also get your employer (if you work as a programmer) or -your school, if any, to sign a ``copyright disclaimer'' for the -program, if necessary. Here a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - program `Gnomovision' (a program to direct compilers to make passes - at assemblers) written by James Hacker. - - SIGNATURE OF TY COON, 1 April 1989 - Ty Coon, President of Vice - -That's all there is to it! - - - -File: gawk-info, Node: This Manual, Next: Getting Started, Prev: License, Up: Top - -Using This Manual -***************** - -The term `gawk' refers to a program (a version of `awk') developed by -the Free Software Foundation, and to the language you use to tell it -what to do. When we need to be careful, we call the program ``the -`awk' utility'' and the language ``the `awk' language''. The purpose -of this manual is to explain the `awk' language and how to run the -`awk' utility. - -The term "`awk' program" refers to a program written by you in the -`awk' programming language. - -*Note Getting Started::, for the bare essentials you need to know to -start using `awk'. - -Useful ``one--liners'' are included to give you a feel for the `awk' -language (*note One-liners::.). - -A sizable sample `awk' program has been provided for you (*note -Sample Program::.). - -If you find terms that you aren't familiar with, try looking them up -in the glossary (*note Glossary::.). - -Most of the time complete `awk' programs are used as examples, but in -some of the more advanced sections, only the part of the `awk' -program that illustrates the concept being described is shown. - -* Menu: - -This chapter contains the following sections: - -* The Files:: Sample data files for use in the `awk' programs - illustrated in this manual. - - - -File: gawk-info, Node: The Files, Up: This Manual - -Input Files for the Examples -============================ - -This manual contains many sample programs. The data for many of -those programs comes from two files. The first file, called -`BBS-list', represents a list of computer bulletin board systems and -information about those systems. - -Each line of this file is one "record". Each record contains the -name of a computer bulletin board, its phone number, the board's baud -rate, and a code for the number of hours it is operational. An `A' -in the last column means the board operates 24 hours all week. A `B' -in the last column means the board operates evening and weekend -hours, only. A `C' means the board operates only on weekends. - - aardvark 555-5553 1200/300 B - alpo-net 555-3412 2400/1200/300 A - barfly 555-7685 1200/300 A - bites 555-1675 2400/1200/300 A - camelot 555-0542 300 C - core 555-2912 1200/300 C - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sdace 555-3430 2400/1200/300 A - sabafoo 555-2127 1200/300 C - -The second data file, called `inventory-shipped', represents -information about shipments during the year. Each line of this file -is also one record. Each record contains the month of the year, the -number of green crates shipped, the number of red boxes shipped, the -number of orange bags shipped, and the number of blue packages -shipped, respectively. - - Jan 13 25 15 115 - Feb 15 32 24 226 - Mar 15 24 34 228 - Apr 31 52 63 420 - May 16 34 29 208 - Jun 31 42 75 492 - Jul 24 34 67 436 - Aug 15 34 47 316 - Sep 13 55 37 277 - Oct 29 54 68 525 - Nov 20 87 82 577 - Dec 17 35 61 401 - - Jan 21 36 64 620 - Feb 26 58 80 652 - Mar 24 75 70 495 - Apr 21 70 74 514 - -If you are reading this in GNU Emacs using Info, you can copy the -regions of text showing these sample files into your own test files. -This way you can try out the examples shown in the remainder of this -document. You do this by using the command `M-x write-region' to -copy text from the Info file into a file for use with `awk' (see your -``GNU Emacs Manual'' for more information). Using this information, -create your own `BBS-list' and `inventory-shipped' files, and -practice what you learn in this manual. - - - -File: gawk-info, Node: Getting Started, Next: Reading Files, Prev: This Manual, Up: Top - -Getting Started With `awk' -************************** - -The basic function of `awk' is to search files for lines (or other -units of text) that contain certain patterns. When a line matching -any of those patterns is found, `awk' performs specified actions on -that line. Then `awk' keeps processing input lines until the end of -the file is reached. - -An `awk' "program" or "script" consists of a series of "rules". -(They may also contain "function definitions", but that is an -advanced feature, so let's ignore it for now. *Note User-defined::.) - -A rule contains a "pattern", an "action", or both. Actions are -enclosed in curly braces to distinguish them from patterns. -Therefore, an `awk' program is a sequence of rules in the form: - - PATTERN { ACTION } - PATTERN { ACTION } - ... - - * Menu: - -* Very Simple:: A very simple example. -* Two Rules:: A less simple one--line example with two rules. -* More Complex:: A more complex example. -* Running gawk:: How to run gawk programs; includes command line syntax. -* Comments:: Adding documentation to gawk programs. -* Statements/Lines:: Subdividing or combining statements into lines. - -* When:: When to use gawk and when to use other things. - - - -File: gawk-info, Node: Very Simple, Next: Two Rules, Up: Getting Started - -A Very Simple Example -===================== - -The following command runs a simple `awk' program that searches the -input file `BBS-list' for the string of characters: `foo'. (A string -of characters is usually called, quite simply, a "string".) - - awk '/foo/ { print $0 }' BBS-list - -When lines containing `foo' are found, they are printed, because -`print $0' means print the current line. (Just `print' by itself -also means the same thing, so we could have written that instead.) - -You will notice that slashes, `/', surround the string `foo' in the -actual `awk' program. The slashes indicate that `foo' is a pattern -to search for. This type of pattern is called a "regular -expression", and is covered in more detail later (*note Regexp::.). -There are single quotes around the `awk' program so that the shell -won't interpret any of it as special shell characters. - -Here is what this program prints: - - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sabafoo 555-2127 1200/300 C - -In an `awk' rule, either the pattern or the action can be omitted, -but not both. - -If the pattern is omitted, then the action is performed for *every* -input line. - -If the action is omitted, the default action is to print all lines -that match the pattern. We could leave out the action (the print -statement and the curly braces) in the above example, and the result -would be the same: all lines matching the pattern `foo' would be -printed. (By comparison, omitting the print statement but retaining -the curly braces makes an empty action that does nothing; then no -lines would be printed.) - - - -File: gawk-info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up: Getting Started - -An Example with Two Rules -========================= - -The `awk' utility reads the input files one line at a time. For each -line, `awk' tries the patterns of all the rules. If several patterns -match then several actions are run, in the order in which they appear -in the `awk' program. If no patterns match, then no actions are run. - -After processing all the rules (perhaps none) that match the line, -`awk' reads the next line (however, *note Next::.). This continues -until the end of the file is reached. - -For example, the `awk' program: - - /12/ { print $0 } - /21/ { print $0 } - -contains two rules. The first rule has the string `12' as the -pattern and `print $0' as the action. The second rule has the string -`21' as the pattern and also has `print $0' as the action. Each -rule's action is enclosed in its own pair of braces. - -This `awk' program prints every line that contains the string `12' -*or* the string `21'. If a line contains both strings, it is printed -twice, once by each rule. - -If we run this program on our two sample data files, `BBS-list' and -`inventory-shipped', as shown here: - - awk '/12/ { print $0 } - /21/ { print $0 }' BBS-list inventory-shipped - -we get the following output: - - aardvark 555-5553 1200/300 B - alpo-net 555-3412 2400/1200/300 A - barfly 555-7685 1200/300 A - bites 555-1675 2400/1200/300 A - core 555-2912 1200/300 C - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sdace 555-3430 2400/1200/300 A - sabafoo 555-2127 1200/300 C - sabafoo 555-2127 1200/300 C - Jan 21 36 64 620 - Apr 21 70 74 514 - -Note how the line in `BBS-list' beginning with `sabafoo' was printed -twice, once for each rule. - - - -File: gawk-info, Node: More Complex, Next: Running gawk, Prev: Two Rules, Up: Getting Started - -A More Complex Example -====================== - -Here is an example to give you an idea of what typical `awk' programs -do. This example shows how `awk' can be used to summarize, select, -and rearrange the output of another utility. It uses features that -haven't been covered yet, so don't worry if you don't understand all -the details. - - ls -l | awk '$5 == "Nov" { sum += $4 } - END { print sum }' - -This command prints the total number of bytes in all the files in the -current directory that were last modified in November (of any year). -(In the C shell you would need to type a semicolon and then a -backslash at the end of the first line; in the Bourne shell you can -type the example as shown.) - -The `ls -l' part of this example is a command that gives you a full -listing of all the files in a directory, including file size and date. -Its output looks like this: - - -rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile - -rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h - -rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h - -rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y - -rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c - -rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c - -rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c - -rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c - -The first field contains read--write permissions, the second field -contains the number of links to the file, and the third field -identifies the owner of the file. The fourth field contains the size -of the file in bytes. The fifth, sixth, and seventh fields contain -the month, day, and time, respectively, that the file was last -modified. Finally, the eighth field contains the name of the file. - -The `$5 == "Nov"' in our `awk' program is an expression that tests -whether the fifth field of the output from `ls -l' matches the string -`Nov'. Each time a line has the string `Nov' in its fifth field, the -action `{ sum += $4 }' is performed. This adds the fourth field (the -file size) to the variable `sum'. As a result, when `awk' has -finished reading all the input lines, `sum' will be the sum of the -sizes of files whose lines matched the pattern. - -After the last line of output from `ls' has been processed, the `END' -pattern is executed, and the value of `sum' is printed. In this -example, the value of `sum' would be 80600. - -These more advanced `awk' techniques are covered in later sections -(*note Actions::.). Before you can move on to more advanced `awk' -programming, you have to know how `awk' interprets your input and -displays your output. By manipulating "fields" and using special -"print" statements, you can produce some very useful and spectacular -looking reports. - - - -File: gawk-info, Node: Running gawk, Next: Comments, Prev: More Complex, Up: Getting Started - -How to Run `awk' Programs -========================= - -There are several ways to run an `awk' program. If the program is -short, it is easiest to include it in the command that runs `awk', -like this: - - awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ... - - where PROGRAM consists of a series of PATTERNS and ACTIONS, as -described earlier. - -When the program is long, you would probably prefer to put it in a -file and run it with a command like this: - - awk -f PROGRAM-FILE INPUT-FILE1 INPUT-FILE2 ... - - * Menu: - -* One-shot:: Running a short throw--away `awk' program. -* Read Terminal:: Using no input files (input from terminal instead). -* Long:: Putting permanent `awk' programs in files. -* Executable Scripts:: Making self--contained `awk' programs. -* Command Line:: How the `awk' command line is laid out. - - - -File: gawk-info, Node: One-shot, Next: Read Terminal, Up: Running gawk - -One--shot Throw--away `awk' Programs ------------------------------------- - -Once you are familiar with `awk', you will often type simple programs -at the moment you want to use them. Then you can write the program -as the first argument of the `awk' command, like this: - - awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ... - - where PROGRAM consists of a series of PATTERNS and ACTIONS, as -described earlier. - -This command format tells the shell to start `awk' and use the -PROGRAM to process records in the input file(s). There are single -quotes around the PROGRAM so that the shell doesn't interpret any -`awk' characters as special shell characters. They cause the shell -to treat all of PROGRAM as a single argument for `awk'. They also -allow PROGRAM to be more than one line long. - -This format is also useful for running short or medium--sized `awk' -programs from shell scripts, because it avoids the need for a -separate file for the `awk' program. A self--contained shell script -is more reliable since there are no other files to misplace. - - - -File: gawk-info, Node: Read Terminal, Next: Long, Prev: One-shot, Up: Running gawk - -Running `awk' without Input Files ---------------------------------- - -You can also use `awk' without any input files. If you type the -command line: - - awk 'PROGRAM' - -then `awk' applies the PROGRAM to the "standard input", which usually -means whatever you type on the terminal. This continues until you -indicate end--of--file by typing `Control-d'. - -For example, if you type: - - awk '/th/' - -whatever you type next will be taken as data for that `awk' program. -If you go on to type the following data, - - Kathy - Ben - Tom - Beth - Seth - Karen - Thomas - `Control-d' - -then `awk' will print - - Kathy - Beth - Seth - -as matching the pattern `th'. Notice that it did not recognize -`Thomas' as matching the pattern. The `awk' language is "case -sensitive", and matches patterns *exactly*. - - - -File: gawk-info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk - -Running Long Programs ---------------------- - -Sometimes your `awk' programs can be very long. In this case it is -more convenient to put the program into a separate file. To tell -`awk' to use that file for its program, you type: - - awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ... - - The `-f' tells the `awk' utility to get the `awk' program from the -file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For -example, you could put the program: - - /th/ - -into the file `th-prog'. Then the command: - - awk -f th-prog - -does the same thing as this one: - - awk '/th/' - -which was explained earlier (*note Read Terminal::.). Note that you -don't usually need single quotes around the file name that you -specify with `-f', because most file names don't contain any of the -shell's special characters. - -If you want to identify your `awk' program files clearly as such, you -can add the extension `.awk' to the filename. This doesn't affect -the execution of the `awk' program, but it does make ``housekeeping'' -easier. - - - -File: gawk-info, Node: Executable Scripts, Next: Command Line, Prev: Long, Up: Running gawk - -Executable `awk' Programs -------------------------- - -(The following section assumes that you are already somewhat familiar -with `awk'.) - -Once you have learned `awk', you may want to write self--contained -`awk' scripts, using the `#!' script mechanism. You can do this on -BSD Unix systems and GNU. - -For example, you could create a text file named `hello', containing -the following (where `BEGIN' is a feature we have not yet discussed): - - #! /bin/awk -f - - # a sample awk program - - BEGIN { print "hello, world" } - -After making this file executable (with the `chmod' command), you can -simply type: - - hello - -at the shell, and the system will arrange to run `awk' as if you had -typed: - - awk -f hello - -Self--contained `awk' scripts are particularly useful for putting -`awk' programs into production on your system, without your users -having to know that they are actually using an `awk' program. - -If your system does not support the `#!' mechanism, you can get a -similar effect using a regular shell script. It would look something -like this: - - : a sample awk program - - awk 'PROGRAM' "$@" - -Using this technique, it is *vital* to enclose the PROGRAM in single -quotes to protect it from interpretation by the shell. If you omit -the quotes, only a shell wizard can predict the result. - -The `"$@"' causes the shell to forward all the command line arguments -to the `awk' program, without interpretation. - - - -File: gawk-info, Node: Command Line, Prev: Executable Scripts, Up: Running gawk - -Details of the `awk' Command Line ---------------------------------- - -(The following section assumes that you are already familiar with -`awk'.) - -There are two ways to run `awk'. Here are templates for both of -them; items enclosed in `[' and `]' in these templates are optional. - - awk [ -FFS ] [ -- ] 'PROGRAM' FILE ... - awk [ -FFS ] -f SOURCE-FILE [ -f SOURCE-FILE ... ] [ -- ] FILE ... - - Options begin with a minus sign, and consist of a single character. -The options and their meanings are as follows: - -`-FFS' - This sets the `FS' variable to FS (*note Special::.). As a - special case, if FS is `t', then `FS' will be set to the tab - character (`"\t"'). - -`-f SOURCE-FILE' - Indicates that the `awk' program is to be found in SOURCE-FILE - instead of in the first non--option argument. - -`--' - This signals the end of the command line options. If you wish - to specify an input file named `-f', you can precede it with the - `--' argument to prevent the `-f' from being interpreted as an - option. This handling of `--' follows the POSIX argument - parsing conventions. - -Any other options will be flagged as invalid with a warning message, -but are otherwise ignored. - -If the `-f' option is *not* used, then the first non--option command -line argument is expected to be the program text. - -The `-f' option may be used more than once on the command line. -`awk' will read its program source from all of the named files, as if -they had been concatenated together into one big file. This is -useful for creating libraries of `awk' functions. Useful functions -can be written once, and then retrieved from a standard place, -instead of having to be included into each individual program. You -can still type in a program at the terminal and use library -functions, by specifying `/dev/tty' as one of the arguments to a -`-f'. Type your program, and end it with the keyboard end--of--file -character `Control-d'. - -Any additional arguments on the command line are made available to -your `awk' program in the `ARGV' array (*note Special::.). These -arguments are normally treated as input files to be processed in the -order specified. However, an argument that has the form VAR`='VALUE, -means to assign the value VALUE to the variable VAR--it does not -specify a file at all. - -Command line options and the program text (if present) are omitted -from the `ARGV' array. All other arguments, including variable -assignments, are included (*note Special::.). - -The distinction between file name arguments and variable--assignment -arguments is made when `awk' is about to open the next input file. -At that point in execution, it checks the ``file name'' to see -whether it is really a variable assignment; if so, instead of trying -to read a file it will, *at that point in the execution*, assign the -variable. - -Therefore, the variables actually receive the specified values after -all previously specified files have been read. In particular, the -values of variables assigned in this fashion are *not* available -inside a `BEGIN' rule (*note BEGIN/END::.), since such rules are run -before `awk' begins scanning the argument list. - -The variable assignment feature is most useful for assigning to -variables such as `RS', `OFS', and `ORS', which control input and -output formats, before listing the data files. It is also useful for -controlling state if multiple passes are needed over a data file. -For example: - - awk 'pass == 1 { PASS 1 STUFF } - pass == 2 { PASS 2 STUFF }' pass=1 datafile pass=2 datafile - - - -File: gawk-info, Node: Comments, Next: Statements/Lines, Prev: Running gawk, Up: Getting Started - -Comments in `awk' Programs -========================== - -When you write a complicated `awk' program, you can put "comments" in -the program file to help you remember what the program does, and how -it works. - -A comment starts with the the sharp sign character, `#', and -continues to the end of the line. The `awk' language ignores the -rest of a line following a sharp sign. For example, we could have -put the following into `th-prog': - - # This program finds records containing the pattern `th'. This is how - # you continue comments on additional lines. - /th/ - -You can put comment lines into keyboard--composed throw--away `awk' -programs also, but this usually isn't very useful; the purpose of a -comment is to help yourself or another person understand the program -at another time. - - - -File: gawk-info, Node: Statements/Lines, Next: When, Prev: Comments, Up: Getting Started - -`awk' Statements versus Lines -============================= - -Most often, each line in an `awk' program is a separate statement or -separate rule, like this: - - awk '/12/ { print $0 } - /21/ { print $0 }' BBS-list inventory-shipped - -But sometimes statements can be more than one line, and lines can -contain several statements. - -You can split a statement into multiple lines by inserting a newline -after any of the following: - - , { ? : || && - -Lines ending in `do' or `else' automatically have their statements -continued on the following line(s). A newline at any other point -ends the statement. - -If you would like to split a single statement into two lines at a -point where a newline would terminate it, you can "continue" it by -ending the first line with a backslash character, `\'. This is -allowed absolutely anywhere in the statement, even in the middle of a -string or regular expression. For example: - - awk '/This program is too long, so continue it\ - on the next line/ { print $1 }' - -We have generally not used backslash continuation in the sample -programs in this manual. Since there is no limit on the length of a -line, it is never strictly necessary; it just makes programs -prettier. We have preferred to make them even more pretty by keeping -the statements short. Backslash continuation is most useful when -your `awk' program is in a separate source file, instead of typed in -on the command line. - -*Warning: this does not work if you are using the C shell.* -Continuation with backslash works for `awk' programs in files, and -also for one--shot programs *provided* you are using the Bourne -shell, the Korn shell, or the Bourne--again shell. But the C shell -used on Berkeley Unix behaves differently! There, you must use two -backslashes in a row, followed by a newline. - -When `awk' statements within one rule are short, you might want to -put more than one of them on a line. You do this by separating the -statements with semicolons, `;'. This also applies to the rules -themselves. Thus, the above example program could have been written: - - /12/ { print $0 } ; /21/ { print $0 } - -*Note:* It is a new requirement that rules on the same line require -semicolons as a separator in the `awk' language; it was done for -consistency with the statements in the action part of rules. - - - -File: gawk-info, Node: When, Prev: Statements/Lines, Up: Getting Started - -When to Use `awk' -================= - -What use is all of this to me, you might ask? Using additional -operating system utilities, more advanced patterns, field separators, -arithmetic statements, and other selection criteria, you can produce -much more complex output. The `awk' language is very useful for -producing reports from large amounts of raw data, like summarizing -information from the output of standard operating system programs -such as `ls'. (*Note A More Complex Example: More Complex.) - -Programs written with `awk' are usually much smaller than they would -be in other languages. This makes `awk' programs easy to compose and -use. Often `awk' programs can be quickly composed at your terminal, -used once, and thrown away. Since `awk' programs are interpreted, -you can avoid the usually lengthy edit--compile--test--debug cycle of -software development. - -Complex programs have been written in `awk', including a complete -retargetable assembler for 8--bit microprocessors (*note Glossary::. -for more information) and a microcode assembler for a special purpose -Prolog computer. However, `awk''s capabilities are strained by tasks -of such complexity. - -If you find yourself writing `awk' scripts of more than, say, a few -hundred lines, you might consider using a different programming -language. Emacs Lisp is a good choice if you need sophisticated -string or pattern matching capabilities. The shell is also good at -string and pattern matching; in addition it allows powerful use of -the standard utilities. More conventional languages like C, C++, or -Lisp offer better facilities for system programming and for managing -the complexity of large programs. Programs in these languages may -require more lines of source code than the equivalent `awk' programs, -but they will be easier to maintain and usually run more efficiently. - - - -File: gawk-info, Node: Reading Files, Next: Printing, Prev: Getting Started, Up: Top - -Reading Files (Input) -********************* - -In the typical `awk' program, all input is read either from the -standard input (usually the keyboard) or from files whose names you -specify on the `awk' command line. If you specify input files, `awk' -reads data from the first one until it reaches the end; then it reads -the second file until it reaches the end, and so on. The name of the -current input file can be found in the special variable `FILENAME' -(*note Special::.). - -The input is split automatically into "records", and processed by the -rules one record at a time. (Records are the units of text mentioned -in the introduction; by default, a record is a line of text.) Each -record read is split automatically into "fields", to make it more -convenient for a rule to work on parts of the record under -consideration. - -On rare occasions you will need to use the `getline' command, which -can do explicit input from any number of files. - -* Menu: - -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Field Separators:: The field separator and how to change it. -* Multiple:: Reading multi--line records. - -* Assignment Options:: Setting variables on the command line and a summary - of command line syntax. This is an advanced method - of input. - -* Getline:: Reading files under explicit program control - using the `getline' function. -* Close Input:: Closing an input file (so you can read from - the beginning once more). - - - -File: gawk-info, Node: Records, Next: Fields, Up: Reading Files - -How Input is Split into Records -=============================== - -The `awk' language divides its input into records and fields. -Records are separated from each other by the "record separator". By -default, the record separator is the "newline" character. Therefore, -normally, a record is a line of text. - -Sometimes you may want to use a different character to separate your -records. You can use different characters by changing the special -variable `RS'. - -The value of `RS' is a string that says how to separate records; the -default value is `"\n"', the string of just a newline character. -This is why lines of text are the default record. Although `RS' can -have any string as its value, only the first character of the string -will be used as the record separator. The other characters are -ignored. `RS' is exceptional in this regard; `awk' uses the full -value of all its other special variables. - -The value of `RS' is changed by "assigning" it a new value (*note -Assignment Ops::.). One way to do this is at the beginning of your -`awk' program, before any input has been processed, using the special -`BEGIN' pattern (*note BEGIN/END::.). This way, `RS' is changed to -its new value before any input is read. The new value of `RS' is -enclosed in quotation marks. For example: - - awk 'BEGIN { RS = "/" } ; { print $0 }' BBS-list - -changes the value of `RS' to `/', the slash character, before reading -any input. Records are now separated by a slash. The second rule in -the `awk' program (the action with no pattern) will proceed to print -each record. Since each `print' statement adds a newline at the end -of its output, the effect of this `awk' program is to copy the input -with each slash changed to a newline. - -Another way to change the record separator is on the command line, -using the variable--assignment feature (*note Command Line::.). - - awk '...' RS="/" SOURCE-FILE - -`RS' will be set to `/' before processing SOURCE-FILE. - -The empty string (a string of no characters) has a special meaning as -the value of `RS': it means that records are separated only by blank -lines. *Note Multiple::, for more details. - -The `awk' utility keeps track of the number of records that have been -read so far from the current input file. This value is stored in a -special variable called `FNR'. It is reset to zero when a new file -is started. Another variable, `NR', is the total number of input -records read so far from all files. It starts at zero but is never -automatically reset to zero. - -If you change the value of `RS' in the middle of an `awk' run, the -new value is used to delimit subsequent records, but the record -currently being processed (and records already finished) are not -affected. - - - -File: gawk-info, Node: Fields, Next: Non-Constant Fields, Prev: Records, Up: Reading Files - -Examining Fields -================ - -When `awk' reads an input record, the record is automatically -separated or "parsed" by the interpreter into pieces called "fields". -By default, fields are separated by whitespace, like words in a line. -Whitespace in `awk' means any string of one or more spaces and/or -tabs; other characters such as newline, formfeed, and so on, that are -considered whitespace by other languages are *not* considered -whitespace by `awk'. - -The purpose of fields is to make it more convenient for you to refer -to these pieces of the record. You don't have to use them--you can -operate on the whole record if you wish--but fields are what make -simple `awk' programs so powerful. - -To refer to a field in an `awk' program, you use a dollar--sign, `$', -followed by the number of the field you want. Thus, `$1' refers to -the first field, `$2' to the second, and so on. For example, suppose -the following is a line of input: - - This seems like a pretty nice example. - - Here the first field, or `$1', is `This'; the second field, or `$2', -is `seems'; and so on. Note that the last field, `$7', is -`example.'. Because there is no space between the `e' and the `.', -the period is considered part of the seventh field. - -No matter how many fields there are, the last field in a record can -be represented by `$NF'. So, in the example above, `$NF' would be -the same as `$7', which is `example.'. Why this works is explained -below (*note Non-Constant Fields::.). If you try to refer to a field -beyond the last one, such as `$8' when the record has only 7 fields, -you get the empty string. - -Plain `NF', with no `$', is a special variable whose value is the -number of fields in the current record. - -`$0', which looks like an attempt to refer to the zeroth field, is a -special case: it represents the whole input record. This is what you -would use when you aren't interested in fields. - -Here are some more examples: - - awk '$1 ~ /foo/ { print $0 }' BBS-list - -This example contains the "matching" operator `~' (*note Comparison -Ops::.). Using this operator, all records in the file `BBS-list' -whose first field contains the string `foo' are printed. - -By contrast, the following example: - - awk '/foo/ { print $1, $NF }' BBS-list - -looks for the string `foo' in *the entire record* and prints the -first field and the last field for each input record containing the -pattern. - -The following program will search the system password file, and print -the entries for users who have no password. - - awk -F: '$2 == ""' /etc/passwd - -This program uses the `-F' option on the command line to set the file -separator. (Fields in `/etc/passwd' are separated by colons. The -second field represents a user's encrypted password, but if the field -is empty, that user has no password.) - - - -File: gawk-info, Node: Non-Constant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files - -Non-constant Field Numbers -========================== - -The number of a field does not need to be a constant. Any expression -in the `awk' language can be used after a `$' to refer to a field. -The `awk' utility evaluates the expression and uses the "numeric -value" as a field number. Consider this example: - - awk '{ print $NR }' - -Recall that `NR' is the number of records read so far: 1 in the first -record, 2 in the second, etc. So this example will print the first -field of the first record, the second field of the second record, and -so on. For the twentieth record, field number 20 will be printed; -most likely this will make a blank line, because the record will not -have 20 fields. - -Here is another example of using expressions as field numbers: - - awk '{ print $(2*2) }' BBS-list - -The `awk' language must evaluate the expression `(2*2)' and use its -value as the field number to print. The `*' sign represents -multiplication, so the expression `2*2' evaluates to 4. This -example, then, prints the hours of operation (the fourth field) for -every line of the file `BBS-list'. - -When you use non--constant field numbers, you may ask for a field -with a negative number. This always results in an empty string, just -like a field whose number is too large for the input record. For -example, `$(1-4)' would try to examine field number -3; it would -result in an empty string. - -If the field number you compute is zero, you get the entire record. - -The number of fields in the current record is stored in the special -variable `NF' (*note Special::.). The expression `$NF' is not a -special feature: it is the direct consequence of evaluating `NF' and -using its value as a field number. - - - -File: gawk-info, Node: Changing Fields, Next: Field Separators, Prev: Non-Constant Fields, Up: Reading Files - -Changing the Contents of a Field -================================ - -You can change the contents of a field as seen by `awk' within an -`awk' program; this changes what `awk' perceives as the current input -record. (The actual input is untouched: `awk' never modifies the -input file.) - -Look at this example: - - awk '{ $3 = $2 - 10; print $2, $3 }' inventory-shipped - -The `-' sign represents subtraction, so this program reassigns field -three, `$3', to be the value of field two minus ten, ``$2' - 10'. -(*Note Arithmetic Ops::.) Then field two, and the new value for -field three, are printed. - -In order for this to work, the text in field `$2' must make sense as -a number; the string of characters must be converted to a number in -order for the computer to do arithmetic on it. The number resulting -from the subtraction is converted back to a string of characters -which then becomes field 3. *Note Conversion::. - -When you change the value of a field (as perceived by `awk'), the -text of the input record is recalculated to contain the new field -where the old one was. `$0' will from that time on reflect the -altered field. Thus, - - awk '{ $2 = $2 - 10; print $0 }' inventory-shipped - -will print a copy of the input file, with 10 subtracted from the -second field of each line. - -You can also assign contents to fields that are out of range. For -example: - - awk '{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 }' inventory-shipped - -We've just created `$6', whose value is the average of fields `$2', -`$3', `$4', and `$5'. The `+' sign represents addition, and the `/' -sign represents division. For the file `inventory-shipped' `$6' -represents the average number of parcels shipped for a particular -month. - -Creating a new field changes what `awk' interprets as the current -input record. The value of `$0' will be recomputed. This -recomputation affects and is affected by features not yet discussed, -in particular, the "Output Field Separator", `OFS', which is used to -separate the fields (*note Output Separators::.), and `NF' (the -number of fields; *note Fields::.). For example, the value of `NF' -will be set to the number of the highest out--of--range field you -create. - -Note, however, that merely *referencing* an out--of--range field will -*not* change the value of either `$0' or `NF'. Referencing an -out--of--range field merely produces a null string. For example: - - if ($(NF+1) != "") - print "can't happen" - else - print "everything is normal" - -should print `everything is normal'. (*Note If::, for more -information about `awk''s `if-else' statements.) - - - -File: gawk-info, Node: Field Separators, Next: Multiple, Prev: Changing Fields, Up: Reading Files - -Specifying How Fields Are Separated -=================================== - -You can change the way `awk' splits a record into fields by changing -the value of the "field separator". The field separator is -represented by the special variable `FS' in an `awk' program, and can -be set by `-F' on the command line. The `awk' language scans each -input line for the field separator character to determine the -positions of fields within that line. Shell programmers take note! -`awk' uses the variable `FS', not `IFS'. - -The default value of the field separator is a string containing a -single space. This value is actually a special case; as you know, by -default, fields are separated by whitespace sequences, not by single -spaces: two spaces in a row do not delimit an empty field. -``Whitespace'' is defined as sequences of one or more spaces or tab -characters. - -You change the value of `FS' by "assigning" it a new value. You can -do this using the special `BEGIN' pattern (*note BEGIN/END::.). This -pattern allows you to change the value of `FS' before any input is -read. The new value of `FS' is enclosed in quotations. For example, -set the value of `FS' to the string `","': - - awk 'BEGIN { FS = "," } ; { print $2 }' - -and use the input line: - - John Q. Smith, 29 Oak St., Walamazoo, MI 42139 - -This `awk' program will extract the string `29 Oak St.'. - -Sometimes your input data will contain separator characters that -don't separate fields the way you thought they would. For instance, -the person's name in the example we've been using might have a title -or suffix attached, such as `John Q. Smith, LXIX'. If you assigned -`FS' to be `,' then: - - awk 'BEGIN { FS = "," } ; { print $2 } - -would extract `LXIX', instead of `29 Oak St.'. If you were expecting -the program to print the address, you would be surprised. So, choose -your data layout and separator characters carefully to prevent -problems like this from happening. - -You can assign `FS' to be a series of characters. For example, the -assignment: - - FS = ", \t" - -makes every area of an input line that consists of a comma followed -by a space and a tab, into a field separator. (`\t' stands for a tab.) - -If `FS' is any single character other than a blank, then that -character is used as the field separator, and two successive -occurrences of that character do delimit an empty field. - -If you assign `FS' to a string longer than one character, that string -is evaluated as a "regular expression" (*note Regexp::.). The value -of the regular expression is used as a field separator. - -`FS' can be set on the command line. You use the `-F' argument to do -so. For example: - - awk -F, 'PROGRAM' INPUT-FILES - -sets `FS' to be the `,' character. Notice that the argument uses a -capital `F'. Contrast this with `-f', which specifies a file -containing an `awk' program. Case is significant in command options: -the `-F' and `-f' options have nothing to do with each other. You -can use both options at the same time to set the `FS' argument *and* -get an `awk' program from a file. - -As a special case, if the argument to `-F' is `t', then `FS' is set -to the tab character. (This is because if you type `-F\t', without -the quotes, at the shell, the `\' gets deleted, so `awk' figures that -you really want your fields to be separated with tabs, and not `t's. -Use `FS="t"' if you really do want to separate your fields with `t's.) - -For example, let's use an `awk' program file called `baud.awk' that -contains the pattern `/300/', and the action `print $1'. We'll use -the operating system utility `cat' to ``look'' at our program: - - % cat baud.awk - /300/ { print $1 } - -Let's also set `FS' to be the `-' character. We will apply all this -information to the file `BBS-list'. This `awk' program will now -print a list of the names of the bulletin boards that operate at 300 -baud and the first three digits of their phone numbers. - - awk -F- -f baud.awk BBS-list - -produces this output: - - aardvark 555 - alpo - barfly 555 - bites 555 - camelot 555 - core 555 - fooey 555 - foot 555 - macfoo 555 - sdace 555 - sabafoo 555 - -Note the second line of output. If you check the original file, you -will see that the second line looked like this: - - alpo-net 555-3412 2400/1200/300 A - -The `-' as part of the system's name was used as the field separator, -instead of the `-' in the phone number that was originally intended. -This demonstrates why you have to be careful in choosing your field -and record separators. - - - -File: gawk-info, Node: Multiple, Next: Assignment Options, Prev: Field Separators, Up: Reading Files - -Multiple--Line Records -====================== - -In some data bases, a single line cannot conveniently hold all the -information in one entry. Then you will want to use multi--line -records. - -The first step in doing this is to choose your data format: when -records are not defined as single lines, how will you want to define -them? What should separate records? - -One technique is to use an unusual character or string to separate -records. For example, you could use the formfeed character (written -`\f' in `awk', as in C) to separate them, making each record a page -of the file. To do this, just set the variable `RS' to `"\f"' (a -string containing the formfeed character), or whatever string you -prefer to use. - -Another technique is to have blank lines separate records. By a -special dispensation, a null string as the value of `RS' indicates -that records are separated by one or more blank lines. If you set -`RS' to the null string, a record will always end at the first blank -line encountered. And the next record won't start until the first -nonblank line that follows--no matter how many blank lines appear in -a row, they will be considered one record--separator. - -The second step is to separate the fields in the record. One way to -do this is to put each field on a separate line: to do this, just set -the variable `FS' to the string `"\n"'. (This simple regular -expression matches a single newline.) Another idea is to divide each -of the lines into fields in the normal manner; the regular expression -`"[ \t\n]+"' will do this nicely by treating the newlines inside the -record just like spaces. - -When `RS' is set to the null string, the newline character *always* -acts as a field separator. This is in addition to whatever value -`FS' has. The probable reason for this rule is so that you get -rational behavior in the default case (i.e. `FS == " "'). This can -be a problem if you really don't want the newline character to -separate fields, since there is no way to do that. However, you can -work around this by using the `split' function to manually break up -your data (*note String Functions::.). - -Here is how to use records separated by blank lines and break each -line into fields normally: - - awk 'BEGIN { RS = ""; FS = "[ \t\n]+" } ; { print $0 }' BBS-list - - - -File: gawk-info, Node: Assignment Options, Next: Getline, Prev: Multiple, Up: Reading Files - -Assigning Variables on the Command Line -======================================= - -You can include variable "assignments" among the file names on the -command line used to invoke `awk' (*note Command Line::.). Such -assignments have the form: - - VARIABLE=TEXT - -and allow you to change variables either at the beginning of the -`awk' run or in between input files. The variable assignment is -performed at a time determined by its position among the input file -arguments: after the processing of the preceding input file argument. -For example: - - awk '{ print $n }' n=4 inventory-shipped n=2 BBS-list - -prints the value of field number `n' for all input records. Before -the first file is read, the command line sets the variable `n' equal -to 4. This causes the fourth field of the file `inventory-shipped' -to be printed. After the first file has finished, but before the -second file is started, `n' is set to 2, so that the second field of -the file `BBS-list' will be printed. - -Command line arguments are made available for explicit examination by -the `awk' program in an array named `ARGV' (*note Special::.). - - - -File: gawk-info, Node: Getline, Prev: Assignment Options, Up: Reading Files - -Explicit Input with `getline' -============================= - -So far we have been getting our input files from `awk''s main input -stream--either the standard input (usually your terminal) or the -files specified on the command line. The `awk' language has a -special built--in function called `getline' that can be used to read -input under your explicit control. - -This command is quite complex and should *not* be used by beginners. -The command (and its variations) is covered here because this is the -section about input. The examples that follow the explanation of the -`getline' command include material that has not been covered yet. -Therefore, come back and attempt the `getline' command *after* you -have reviewed the rest of this manual and have a good knowledge of -how `awk' works. - -When retrieving input, `getline' returns a 1 if it found a record, -and a 0 if the end of the file was encountered. If there was some -error in getting a record, such as a file that could not be opened, -then `getline' returns a -1. - -In the following examples, COMMAND stands for a string value that -represents a shell command. - -`getline' - The `getline' function can be used by itself, in an `awk' - program, to read input from the current input. All it does in - this case is read the next input record and split it up into - fields. This is useful if you've finished processing the - current record, but you want to do some special processing - *right now* on the next record. Here's an example: - - awk '{ - if (t = index($0, "/*")) { - if(t > 1) - tmp = substr($0, 1, t - 1) - else - tmp = "" - u = index(substr($0, t + 2), "*/") - while (! u) { - getline - t = -1 - u = index($0, "*/") - } - if(u <= length($0) - 2) - $0 = tmp substr($0, t + u + 3) - else - $0 = tmp - } - print $0 - }' - - This `awk' program deletes all comments, `/* ... */', from the - input. By replacing the `print $0' with other statements, you - could perform more complicated processing on the de--commented - input, such as search it for matches for a regular expression. - - This form of the `getline' command sets `NF' (the number of - fields; *note Fields::.), `NR' (the number of records read so - far), the `FNR' variable (*note Records::.), and the value of - `$0'. - - *Note:* The new value of `$0' will be used in testing the - patterns of any subsequent rules. The original value of `$0' - that triggered the rule which executed `getline' is lost. By - contrast, the `next' statement reads a new record but - immediately begins processing it normally, starting with the - first rule in the program. *Note Next::. - -`getline VAR' - This form of `getline' reads a record into the variable VAR. - This is useful when you want your program to read the next - record from the input file, but you don't want to subject the - record to the normal input processing. - - For example, suppose the next line is a comment, or a special - string, and you want to read it, but you must make certain that - it won't accidentally trigger any rules. This version of - `getline' will allow you to read that line and store it in a - variable so that the main read--a--line--and--check--each--rule - loop of `awk' never sees it. - - The following example swaps every two lines of input. For - example, given: - - wan - tew - free - phore - - it outputs: - - tew - wan - phore - free - - Here's the program: - - awk '{ - if ((getline tmp) > 0) { - print tmp - print $0 - } else - print $0 - }' - - The `getline' function used in this way sets only `NR' and `FNR' - (and of course, VAR). The record is not split into fields, so - the values of the fields (including `$0') and the value of `NF' - do not change. - -`getline < FILE' - This form of the `getline' function takes its input from the - file FILE. Here FILE is a string--valued expression that - specifies the file name. - - This form is useful if you want to read your input from a - particular file, instead of from the main input stream. For - example, the following program reads its input record from the - file `foo.input' when it encounters a first field with a value - equal to 10 in the current input file. - - awk '{ - if ($1 == 10) { - getline < "foo.input" - print - } else - print - }' - - Since the main input stream is not used, the values of `NR' and - `FNR' are not changed. But the record read is split into fields - in the normal manner, so the values of `$0' and other fields are - changed. So is the value of `NF'. - - This does not cause the record to be tested against all the - patterns in the `awk' program, in the way that would happen if - the record were read normally by the main processing loop of - `awk'. However the new record is tested against any subsequent - rules, just as when `getline' is used without a redirection. - -`getline VAR < FILE' - This form of the `getline' function takes its input from the - file FILE and puts it in the variable VAR. As above, FILE is a - string--valued expression that specifies the file to read from. - - In this version of `getline', none of the built--in variables - are changed, and the record is not split into fields. The only - variable changed is VAR. - - For example, the following program copies all the input files to - the output, except for records that say `@include FILENAME'. - Such a record is replaced by the contents of the file FILENAME. - - awk '{ - if (NF == 2 && $1 == "@include") { - while ((getline line < $2) > 0) - print line - close($2) - } else - print - }' - - Note here how the name of the extra input file is not built into - the program; it is taken from the data, from the second field on - the `@include' line. - - The `close' command is used to ensure that if two identical - `@include' lines appear in the input, the entire specified file - is included twice. *Note Close Input::. - - One deficiency of this program is that it does not process - nested `@include' statements the way a true macro preprocessor - would. - -`COMMAND | getline' - You can "pipe" the output of a command into `getline'. A pipe - is simply a way to link the output of one program to the input - of another. In this case, the string COMMAND is run as a shell - command and its output is piped into `awk' to be used as input. - This form of `getline' reads one record from the pipe. - - For example, the following program copies input to output, - except for lines that begin with `@execute', which are replaced - by the output produced by running the rest of the line as a - shell command: - - awk '{ - if ($1 == "@execute") { - tmp = substr($0, 10) - while ((tmp | getline) > 0) - print - close(tmp) - } else - print - }' - - The `close' command is used to ensure that if two identical - `@execute' lines appear in the input, the command is run again - for each one. *Note Close Input::. - - Given the input: - - foo - bar - baz - @execute who - bletch - - the program might produce: - - foo - bar - baz - hack ttyv0 Jul 13 14:22 - hack ttyp0 Jul 13 14:23 (gnu:0) - hack ttyp1 Jul 13 14:23 (gnu:0) - hack ttyp2 Jul 13 14:23 (gnu:0) - hack ttyp3 Jul 13 14:23 (gnu:0) - bletch - - Notice that this program ran the command `who' and printed the - result. (If you try this program yourself, you will get - different results, showing you logged in.) - - This variation of `getline' splits the record into fields, sets - the value of `NF' and recomputes the value of `$0'. The values - of `NR' and `FNR' are not changed. - -`COMMAND | getline VAR' - The output of the command COMMAND is sent through a pipe to - `getline' and into the variable VAR. For example, the following - program reads the current date and time into the variable - `current_time', using the utility called `date', and then prints - it. - - awk 'BEGIN { - "date" | getline current_time - close("date") - print "Report printed on " current_time - }' - - In this version of `getline', none of the built--in variables - are changed, and the record is not split into fields. - - - -File: gawk-info, Node: Close Input, Up: Getline - -Closing Input Files -------------------- - -If the same file name or the same shell command is used with -`getline' more than once during the execution of the `awk' program, -the file is opened (or the command is executed) only the first time. -At that time, the first record of input is read from that file or -command. The next time the same file or command is used in -`getline', another record is read from it, and so on. - -What this implies is that if you want to start reading the same file -again from the beginning, or if you want to rerun a shell command -(rather that reading more output from the command), you must take -special steps. What you can do is use the `close' statement: - - close (FILENAME) - -This statement closes a file or pipe, represented here by FILENAME. -The string value of FILENAME must be the same value as the string -used to open the file or pipe to begin with. - -Once this statement is executed, the next `getline' from that file or -command will reopen the file or rerun the command. - - - -File: gawk-info, Node: Printing, Next: One-liners, Prev: Reading Files, Up: Top - -Printing Output -*************** - -One of the most common things that actions do is to output or "print" -some or all of the input. For simple output, use the `print' -statement. For fancier formatting use the `printf' statement. Both -are described in this chapter. - -* Menu: - -* Print:: The `print' statement. -* Print Examples:: Simple examples of `print' statements. -* Output Separators:: The output separators and how to change them. - -* Redirection:: How to redirect output to multiple files and pipes. -* Close Output:: How to close output files and pipes. - -* Printf:: The `printf' statement. - - - -File: gawk-info, Node: Print, Next: Print Examples, Up: Printing - -The `print' Statement -===================== - -The `print' statement does output with simple, standardized -formatting. You specify only the strings or numbers to be printed, -in a list separated by commas. They are output, separated by single -spaces, followed by a newline. The statement looks like this: - - print ITEM1, ITEM2, ... - - The entire list of items may optionally be enclosed in parentheses. -The parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a -redirection (*note Redirection::.). The relational operators are -`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison -Ops::.). - -The items printed can be constant strings or numbers, fields of the -current record (such as `$1'), variables, or any `awk' expressions. -The `print' statement is completely general for computing *what* -values to print. With one exception (*note Output Separators::.), -what you can't do is specify *how* to print them--how many columns to -use, whether to use exponential notation or not, and so on. For -that, you need the `printf' statement (*note Printf::.). - -To print a fixed piece of text, write a string constant as one item, -such as `"Hello there"'. If you forget to use the double--quote -characters, your text will be taken as an `awk' expression, and you -will probably get an error. Keep in mind that a space will be -printed between any two items. - -The simple statement `print' with no items is equivalent to `print -$0': it prints the entire current record. To print a blank line, use -`print ""', where `""' is the null, or empty, string. - -Most often, each `print' statement makes one line of output. But it -isn't limited to one line. If an item value is a string that -contains a newline, the newline is output along with the rest of the -string. A single `print' can make any number of lines this way. - - - -File: gawk-info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing - -Examples of `print' Statements -============================== - -Here is an example that prints the first two fields of each input -record, with a space between them: - - awk '{ print $1, $2 }' inventory-shipped - -Its output looks like this: - - Jan 13 - Feb 15 - Mar 15 - ... - - A common mistake in using the `print' statement is to omit the comma -between two items. This often has the effect of making the items run -together in the output, with no space. The reason for this is that -juxtaposing two string expressions in `awk' means to concatenate -them. For example, without the comma: - - awk '{ print $1 $2 }' inventory-shipped - -prints: - - Jan13 - Feb15 - Mar15 - ... - - Neither example's output makes much sense to someone unfamiliar with -the file `inventory-shipped'. A heading line at the beginning would -make it clearer. Let's add some headings to our table of months -(`$1') and green crates shipped (`$2'). We do this using the BEGIN -pattern (*note BEGIN/END::.) to cause the headings to be printed only -once: - - awk 'BEGIN { print "Month Crates" - print "---- -----" } - { print $1, $2 }' inventory-shipped - -Did you already guess what will happen? This program prints the -following: - - Month Crates - ---- ----- - Jan 13 - Feb 15 - Mar 15 - ... - - The headings and the table data don't line up! We can fix this by -printing some spaces between the two fields: - - awk 'BEGIN { print "Month Crates" - print "---- -----" } - { print $1, " ", $2 }' inventory-shipped - -You can imagine that this way of lining up columns can get pretty -complicated when you have many columns to fix. Counting spaces for -two or three columns can be simple, but more than this and you can -get ``lost'' quite easily. This is why the `printf' statement was -created (*note Printf::.); one of its specialties is lining up -columns of data. - - - -File: gawk-info, Node: Output Separators, Next: Redirection, Prev: Print Examples, Up: Printing - -Output Separators -================= - -As mentioned previously, a `print' statement contains a list of -items, separated by commas. In the output, the items are normally -separated by single spaces. But they do not have to be spaces; a -single space is only the default. You can specify any string of -characters to use as the "output field separator", by setting the -special variable `OFS'. The initial value of this variable is the -string `" "'. - -The output from an entire `print' statement is called an "output -record". Each `print' statement outputs one output record and then -outputs a string called the "output record separator". The special -variable `ORS' specifies this string. The initial value of the -variable is the string `"\n"' containing a newline character; thus, -normally each `print' statement makes a separate line. - -You can change how output fields and records are separated by -assigning new values to the variables `OFS' and/or `ORS'. The usual -place to do this is in the `BEGIN' rule (*note BEGIN/END::.), so that -it happens before any input is processed. You may also do this with -assignments on the command line, before the names of your input files. - -The following example prints the first and second fields of each -input record separated by a semicolon, with a blank line added after -each line: - - awk 'BEGIN { OFS = ";"; ORS = "\n\n" } - { print $1, $2 }' BBS-list - -If the value of `ORS' does not contain a newline, all your output -will be run together on a single line, unless you output newlines -some other way. - - - -File: gawk-info, Node: Redirection, Next: Printf, Prev: Output Separators, Up: Printing - -Redirecting Output of `print' and `printf' -========================================== - -So far we have been dealing only with output that prints to the -standard output, usually your terminal. Both `print' and `printf' -can be told to send their output to other places. This is called -"redirection". - -A redirection appears after the `print' or `printf' statement. -Redirections in `awk' are written just like redirections in shell -commands, except that they are written inside the `awk' program. - -Here are the three forms of output redirection. They are all shown -for the `print' statement, but they work for `printf' also. - -`print ITEMS > OUTPUT-FILE' - This type of redirection prints the items onto the output file - OUTPUT-FILE. The file name OUTPUT-FILE can be any expression. - Its value is changed to a string and then used as a filename - (*note Expressions::.). - - When this type of redirection is used, the OUTPUT-FILE is erased - before the first output is written to it. Subsequent writes do - not erase OUTPUT-FILE, but append to it. If OUTPUT-FILE does - not exist, then it is created. - - For example, here is how one `awk' program can write a list of - BBS names to a file `name-list' and a list of phone numbers to a - file `phone-list'. Each output file contains one name or number - per line. - - awk '{ print $2 > "phone-list" - print $1 > "name-list" }' BBS-list - -`print ITEMS >> OUTPUT-FILE' - This type of redirection prints the items onto the output file - OUTPUT-FILE. The difference between this and the single--`>' - redirection is that the old contents (if any) of OUTPUT-FILE are - not erased. Instead, the `awk' output is appended to the file. - -`print ITEMS | COMMAND' - It is also possible to send output through a "pipe" instead of - into a file. This type of redirection opens a pipe to COMMAND - and writes the values of ITEMS through this pipe, to another - process created to execute COMMAND. - - The redirection argument COMMAND is actually an `awk' - expression. Its value is converted to a string, whose contents - give the shell command to be run. - - For example, this produces two files, one unsorted list of BBS - names and one list sorted in reverse alphabetical order: - - awk '{ print $1 > "names.unsorted" - print $1 | "sort -r > names.sorted" }' BBS-list - - Here the unsorted list is written with an ordinary redirection - while the sorted list is written by piping through the `sort' - utility. - - Here is an example that uses redirection to mail a message to a - mailing list `bug-system'. This might be useful when trouble is - encountered in an `awk' script run periodically for system - maintenance. - - print "Awk script failed:", $0 | "mail bug-system" - print "processing record number", FNR, "of", FILENAME | "mail bug-system" - close ("mail bug-system") - - We use a `close' statement here because it's a good idea to - close the pipe as soon as all the intended output has been sent - to it. *Note Close Output::, for more information on this. - -Redirecting output using `>', `>>', or `|' asks the system to open a -file or pipe only if the particular FILE or COMMAND you've specified -has not already been written to by your program. - - - -File: gawk-info, Node: Close Output, Up: Redirection - -Closing Output Files and Pipes ------------------------------- - -When a file or pipe is opened, the filename or command associated -with it is remembered by `awk' and subsequent writes to the same file -or command are appended to the previous writes. The file or pipe -stays open until `awk' exits. This is usually convenient. - -Sometimes there is a reason to close an output file or pipe earlier -than that. To do this, use the `close' command, as follows: - - close (FILENAME) - -or - - close (COMMAND) - -The argument FILENAME or COMMAND can be any expression. Its value -must exactly equal the string used to open the file or pipe to begin -with--for example, if you open a pipe with this: - - print $1 | "sort -r > names.sorted" - -then you must close it with this: - - close ("sort -r > names.sorted") - -Here are some reasons why you might need to close an output file: - - * To write a file and read it back later on in the same `awk' - program. Close the file when you are finished writing it; then - you can start reading it with `getline' (*note Getline::.). - - * To write numerous files, successively, in the same `awk' - program. If you don't close the files, eventually you will - exceed the system limit on the number of open files in one - process. So close each one when you are finished writing it. - - * To make a command finish. When you redirect output through a - pipe, the command reading the pipe normally continues to try to - read input as long as the pipe is open. Often this means the - command cannot really do its work until the pipe is closed. For - example, if you redirect output to the `mail' program, the - message will not actually be sent until the pipe is closed. - - * To run the same subprogram a second time, with the same arguments. - This is not the same thing as giving more input to the first run! - - For example, suppose you pipe output to the `mail' program. If - you output several lines redirected to this pipe without closing - it, they make a single message of several lines. By contrast, - if you close the pipe after each line of output, then each line - makes a separate message. - - - -File: gawk-info, Node: Printf, Prev: Redirection, Up: Printing - -Using `printf' Statements For Fancier Printing -============================================== - -If you want more precise control over the output format than `print' -gives you, use `printf'. With `printf' you can specify the width to -use for each item, and you can specify various stylistic choices for -numbers (such as what radix to use, whether to print an exponent, -whether to print a sign, and how many digits to print after the -decimal point). You do this by specifying a "format string". - -* Menu: - -* Basic Printf:: Syntax of the `printf' statement. -* Format-Control:: Format-control letters. -* Modifiers:: Format--specification modifiers. -* Printf Examples:: Several examples. - - - -File: gawk-info, Node: Basic Printf, Next: Format-Control, Up: Printf - -Introduction to the `printf' Statement --------------------------------------- - -The `printf' statement looks like this: - - printf FORMAT, ITEM1, ITEM2, ... - - The entire list of items may optionally be enclosed in parentheses. -The parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a -redirection (*note Redirection::.). The relational operators are -`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison -Ops::.). - -The difference between `printf' and `print' is the argument FORMAT. -This is an expression whose value is taken as a string; its job is to -say how to output each of the other arguments. It is called the -"format string". - -The format string is essentially the same as in the C library -function `printf'. Most of FORMAT is text to be output verbatim. -Scattered among this text are "format specifiers", one per item. -Each format specifier says to output the next item at that place in -the format. - -The `printf' statement does not automatically append a newline to its -output. It outputs nothing but what the format specifies. So if you -want a newline, you must include one in the format. The output -separator variables `OFS' and `ORS' have no effect on `printf' -statements. - - - -File: gawk-info, Node: Format-Control, Next: Modifiers, Prev: Basic Printf, Up: Printf - -Format--Control Characters --------------------------- - -A format specifier starts with the character `%' and ends with a -"format--control letter"; it tells the `printf' statement how to -output one item. (If you actually want to output a `%', write `%%'.) -The format--control letter specifies what kind of value to print. -The rest of the format specifier is made up of optional "modifiers" -which are parameters such as the field width to use. - -Here is a list of them: - -`c' - This prints a number as an ASCII character. Thus, `printf "%c", - 65' outputs the letter `A'. The output for a string value is - the first character of the string. - -`d' - This prints a decimal integer. - -`e' - This prints a number in scientific (exponential) notation. For - example, - - printf "%4.3e", 1950 - - prints `1.950e+03', with a total of 4 significant figures of - which 3 follow the decimal point. The `4.3' are "modifiers", - discussed below. - -`f' - This prints a number in floating point notation. - -`g' - This prints either scientific notation or floating point - notation, whichever is shorter. - -`o' - This prints an unsigned octal integer. - -`s' - This prints a string. - -`x' - This prints an unsigned hexadecimal integer. - -`%' - This isn't really a format--control letter, but it does have a - meaning when used after a `%': the sequence `%%' outputs one - `%'. It does not consume an argument. - - - -File: gawk-info, Node: Modifiers, Next: Printf Examples, Prev: Format-Control, Up: Printf - -Modifiers for `printf' Formats ------------------------------- - -A format specification can also include "modifiers" that can control -how much of the item's value is printed and how much space it gets. -The modifiers come between the `%' and the format--control letter. -Here are the possible modifiers, in the order in which they may appear: - -`-' - The minus sign, used before the width modifier, says to - left--justify the argument within its specified width. Normally - the argument is printed right--justified in the specified width. - -`WIDTH' - This is a number representing the desired width of a field. - Inserting any number between the `%' sign and the format control - character forces the field to be expanded to this width. The - default way to do this is to pad with spaces on the left. - -`.PREC' - This is a number that specifies the precision to use when - printing. This specifies the number of digits you want printed - to the right of the decimal place. - -The C library `printf''s dynamic WIDTH and PREC capability (for -example, `"%*.*s"') is not supported. However, it can be easily -simulated using concatenation to dynamically build the format string. - - - -File: gawk-info, Node: Printf Examples, Prev: Modifiers, Up: Printf - -Examples of Using `printf' --------------------------- - -Here is how to use `printf' to make an aligned table: - - awk '{ printf "%-10s %s\n", $1, $2 }' BBS-list - -prints the names of bulletin boards (`$1') of the file `BBS-list' as -a string of 10 characters, left justified. It also prints the phone -numbers (`$2') afterward on the line. This will produce an aligned -two--column table of names and phone numbers, like so: - - aardvark 555-5553 - alpo-net 555-3412 - barfly 555-7685 - bites 555-1675 - camelot 555-0542 - core 555-2912 - fooey 555-1234 - foot 555-6699 - macfoo 555-6480 - sdace 555-3430 - sabafoo 555-2127 - -Did you notice that we did not specify that the phone numbers be -printed as numbers? They had to be printed as strings because the -numbers are separated by a dash. This dash would be interpreted as a -"minus" sign if we had tried to print the phone numbers as numbers. -This would have led to some pretty confusing results. - -We did not specify a width for the phone numbers because they are the -last things on their lines. We don't need to put spaces after them. - -We could make our table look even nicer by adding headings to the -tops of the columns. To do this, use the BEGIN pattern (*note -BEGIN/END::.) to cause the header to be printed only once, at the -beginning of the `awk' program: - - awk 'BEGIN { print "Name Number" - print "--- -----" } - { printf "%-10s %s\n", $1, $2 }' BBS-list - -Did you notice that we mixed `print' and `printf' statements in the -above example? We could have used just `printf' statements to get -the same results: - - awk 'BEGIN { printf "%-10s %s\n", "Name", "Number" - printf "%-10s %s\n", "---", "-----" } - { printf "%-10s %s\n", $1, $2 }' BBS-list - -By outputting each column heading with the same format specification -used for the elements of the column, we have made sure that the -headings will be aligned just like the columns. - -The fact that the same format specification is used can be emphasized -by storing it in a variable, like so: - - awk 'BEGIN { format = "%-10s %s\n" - printf format, "Name", "Number" - printf format, "---", "-----" } - { printf format, $1, $2 }' BBS-list - -See if you can use the `printf' statement to line up the headings and -table data for our `inventory-shipped' example covered earlier in the -section on the `print' statement (*note Print::.). - - - -File: gawk-info, Node: One-liners, Next: Patterns, Prev: Printing, Up: Top - -Useful ``One-liners'' -********************* - -Useful `awk' programs are often short, just a line or two. Here is a -collection of useful, short programs to get you started. Some of -these programs contain constructs that haven't been covered yet. The -description of the program will give you a good idea of what is going -on, but please read the rest of the manual to become an `awk' expert! - -`awk '{ num_fields = num_fields + NF }' -`` END { print num_fields }''' - This program prints the total number of fields in all input lines. - -`awk 'length($0) > 80'' - This program prints every line longer than 80 characters. The - sole rule has a relational expression as its pattern, and has no - action (so the default action, printing the record, is used). - -`awk 'NF > 0'' - This program prints every line that has at least one field. - This is an easy way to delete blank lines from a file (or - rather, to create a new file similar to the old file but from - which the blank lines have been deleted). - -`awk '{ if (NF > 0) print }'' - This program also prints every line that has at least one field. - Here we allow the rule to match every line, then decide in the - action whether to print. - -`awk 'BEGIN { for (i = 1; i <= 7; i++)' -`` print int(101 * rand()) }''' - This program prints 7 random numbers from 0 to 100, inclusive. - -`ls -l FILES | awk '{ x += $4 } ; END { print "total bytes: " x }'' - This program prints the total number of bytes used by FILES. - -`expand FILE | awk '{ if (x < length()) x = length() }' -`` END { print "maximum line length is " x }''' - This program prints the maximum line length of FILE. The input - is piped through the `expand' program to change tabs into - spaces, so the widths compared are actually the right--margin - columns. - - - -File: gawk-info, Node: Patterns, Next: Actions, Prev: One-liners, Up: Top - -Patterns -******** - -Patterns control the execution of rules: a rule is executed when its -pattern matches the input record. The `awk' language provides -several special patterns that are described in the sections that -follow. Patterns include: - -NULL - The empty pattern, which matches every input record. (*Note The - Empty Pattern: Empty.) - -/REGULAR EXPRESSION/ - A regular expression as a pattern. It matches when the text of - the input record fits the regular expression. (*Note Regular - Expressions as Patterns: Regexp.) - -CONDEXP - A single comparison expression. It matches when it is true. - (*Note Comparison Expressions as Patterns: Comparison Patterns.) - -`BEGIN' -`END' - Special patterns to supply start--up or clean--up information to - `awk'. (*Note Specifying Record Ranges With Patterns: BEGIN/END.) - -PAT1, PAT2 - A pair of patterns separated by a comma, specifying a range of - records. (*Note Specifying Record Ranges With Patterns: Ranges.) - -CONDEXP1 BOOLEAN CONDEXP2 - A "compound" pattern, which combines expressions with the - operators `and', `&&', and `or', `||'. (*Note Boolean - Operators and Patterns: Boolean.) - -! CONDEXP - The pattern CONDEXP is evaluated. Then the `!' performs a - boolean ``not'' or logical negation operation; if the input line - matches the pattern in CONDEXP then the associated action is - *not* executed. If the input line did not match that pattern, - then the action *is* executed. (*Note Boolean Operators and - Patterns: Boolean.) - -(EXPR) - Parentheses may be used to control how operators nest. - -PAT1 ? PAT2 : PAT3 - The first pattern is evaluated. If it is true, the input line - is tested against the second pattern, otherwise it is tested - against the third. (*Note Conditional Patterns: Conditional - Patterns.) - -* Menu: - -The following subsections describe these forms in detail: - -* Empty:: The empty pattern, which matches every record. - -* Regexp:: Regular expressions such as `/foo/'. - -* Comparison Patterns:: Comparison expressions such as `$1 > 10'. - -* Boolean:: Combining comparison expressions. - -* Ranges:: Using pairs of patterns to specify record ranges. - -* BEGIN/END:: Specifying initialization and cleanup rules. - -* Conditional Patterns:: Patterns such as `pat1 ? pat2 : pat3'. - - - -File: gawk-info, Node: Empty, Next: Regexp, Up: Patterns - -The Empty Pattern -================= - -An empty pattern is considered to match *every* input record. For -example, the program: - - awk '{ print $1 }' BBS-list - -prints just the first field of every record. - - - -File: gawk-info, Node: Regexp, Next: Comparison Patterns, Prev: Empty, Up: Patterns - -Regular Expressions as Patterns -=============================== - -A "regular expression", or "regexp", is a way of describing classes -of strings. When enclosed in slashes (`/'), it makes an `awk' -pattern that matches every input record that contains a match for the -regexp. - -The simplest regular expression is a sequence of letters, numbers, or -both. Such a regexp matches any string that contains that sequence. -Thus, the regexp `foo' matches any string containing `foo'. (More -complicated regexps let you specify classes of similar strings.) - -* Menu: - -* Usage: Regexp Usage. How regexps are used in patterns. -* Operators: Regexp Operators. How to write a regexp. - - - -File: gawk-info, Node: Regexp Usage, Next: Regexp Operators, Up: Regexp - -How to use Regular Expressions ------------------------------- - -When you enclose `foo' in slashes, you get a pattern that matches a -record that contains `foo'. For example, this prints the second -field of each record that contains `foo' anywhere: - - awk '/foo/ { print $2 }' BBS-list - -Regular expressions can also be used in comparison expressions. Then -you can specify the string to match against; it need not be the -entire current input record. These comparison expressions can be -used as patterns or in `if' and `while' statements. - -`EXP ~ /REGEXP/' - This is true if the expression EXP (taken as a character string) - is matched by REGEXP. The following example matches, or - selects, all input records with the letter `J' in the first field: - - awk '$1 ~ /J/' inventory-shipped - - So does this: - - awk '{ if ($1 ~ /J/) print }' inventory-shipped - -`EXP !~ /REGEXP/' - This is true if the expression EXP (taken as a character string) - is *not* matched by REGEXP. The following example matches, or - selects, all input records whose first field *does not* contain - the letter `J': - - awk '$1 !~ /J/' inventory-shipped - -The right hand side of a `~' or `!~' operator need not be a constant -regexp (i.e. a string of characters between `/'s). It can also be -"computed", or "dynamic". For example: - - identifier = "[A-Za-z_][A-Za-z_0-9]+" - $0 ~ identifier - -sets `identifier' to a regexp that describes `awk' variable names, -and tests if the input record matches this regexp. - -A dynamic regexp may actually be any expression. The expression is -evaluated, and the result is treated as a string that describes a -regular expression. - - - -File: gawk-info, Node: Regexp Operators, Prev: Regexp Usage, Up: Regexp - -Regular Expression Operators ----------------------------- - -You can combine regular expressions with the following characters, -called "regular expression operators", or "metacharacters", to -increase the power and versatility of regular expressions. This is a -table of metacharacters: - -`\' - This is used to suppress the special meaning of a character when - matching. For example: - - \$ - - matches the character `$'. - -`^' - This matches the beginning of the string or the beginning of a - line within the string. For example: - - ^@chapter - - matches the `@chapter' at the beginning of a string, and can be - used to identify chapter beginnings in Texinfo source files. - -`$' - This is similar to `^', but it matches only at the end of a - string or the end of a line within the string. For example: - - /p$/ - - as a pattern matches a record that ends with a `p'. - -`.' - This matches any single character except a newline. For example: - - .P - - matches any single character followed by a `P' in a string. - Using concatenation we can make regular expressions like `U.A', - which matches any three--character string that begins with `U' - and ends with `A'. - -`[...]' - This is called a "character set". It matches any one of a group - of characters that are enclosed in the square brackets. For - example: - - [MVX] - - matches any of the characters `M', `V', or `X' in a string. - - Ranges of characters are indicated by using a hyphen between the - beginning and ending characters, and enclosing the whole thing - in brackets. For example: - - [0-9] - - matches any string that contains a digit. - - Note that special patterns have to be followed to match the - characters, `]', `-', and `^' when they are enclosed in the - square brackets. To match a `]', make it the first character in - the set. For example: - - []d] - - matches either `]', or `d'. - - To match `-', write it as `--', which is a range containing only - `-'. You may also make the `-' be the first or last character - in the set. To match `^', make it any character except the - first one of a set. - -`[^ ...]' - This is the "complemented character set". The first character - after the `[' *must* be a `^'. This matches any characters - *except* those in the square brackets. For example: - - [^0-9] - - matches any characters that are not digits. - -`|' - This is the "alternation operator" and it is used to specify - alternatives. For example: - - ^P|[0-9] - - matches any string that matches either `^P' or `[0-9]'. This - means it matches any string that contains a digit or starts with - `P'. - -`(...)' - Parentheses are used for grouping in regular expressions as in - arithmetic. They can be used to concatenate regular expressions - containing the alternation operator, `|'. - -`*' - This symbol means that the preceding regular expression is to be - repeated as many times as possible to find a match. For example: - - ph* - - applies the `*' symbol to the preceding `h' and looks for - matches to one `p' followed by any number of `h''s. This will - also match just `p' if no `h''s are present. - - The `*' means repeat the *smallest* possible preceding - expression in order to find a match. The `awk' language - processes a `*' by matching as many repetitions as can be found. - For example: - - awk '/\(c[ad][ad]*r x\)/ { print }' sample - - matches every record in the input containing a string of the - form `(car x)', `(cdr x)', `(cadr x)', and so on. - -`+' - This symbol is similar to `*', but the preceding expression must - be matched at least once. This means that: - - wh+y - - would match `why' and `whhy' but not `wy', whereas `wh*y' would - match all three of these strings. And this is a simpler way of - writing the last `*' example: - - awk '/\(c[ad]+r x\)/ { print }' sample - -`?' - This symbol is similar to `*', but the preceding expression can - be matched once or not at all. For example: - - fe?d - - will match `fed' or `fd', but nothing else. - -In regular expressions, the `*', `+', and `?' operators have the -highest precedence, followed by concatenation, and finally by `|'. -As in arithmetic, parentheses can change how operators are grouped. - -Any other character stands for itself. However, it is important to -note that case in regular expressions *is* significant, both when -matching ordinary (i.e. non--metacharacter) characters, and inside -character sets. Thus a `w' in a regular expression matches only a -lower case `w' and not either an uppercase or lowercase `w'. When -you want to do a case--independent match, you have to use a character -set: `[Ww]'. - - - -File: gawk-info, Node: Comparison Patterns, Next: Ranges, Prev: Regexp, Up: Patterns - -Comparison Expressions as Patterns -================================== - -"Comparison patterns" use "relational operators" to compare strings -or numbers. The relational operators are the same as in C. Here is -a table of them: - -`X < Y' - True if X is less than Y. - -`X <= Y' - True if X is less than or equal to Y. - -`X > Y' - True if X is greater than Y. - -`X >= Y' - True if X is greater than or equal to Y. - -`X == Y' - True if X is equal to Y. - -`X != Y' - True if X is not equal to Y. - -Comparison expressions can be used as patterns to control whether a -rule is executed. The expression is evaluated for each input record -read, and the pattern is considered matched if the condition is "true". - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (*note Conversion::.). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, `"10"' is less than `"9"'. - -The following example prints the second field of each input record -whose first field is precisely `foo'. - - awk '$1 == "foo" { print $2 }' BBS-list - -Contrast this with the following regular expression match, which -would accept any record with a first field that contains `foo': - - awk '$1 ~ "foo" { print $2 }' BBS-list - - - -File: gawk-info, Node: Ranges, Next: BEGIN/END, Prev: Comparison Patterns, Up: Patterns - -Specifying Record Ranges With Patterns -====================================== - -A "range pattern" is made of two patterns separated by a comma: -`BEGPAT, ENDPAT'. It matches ranges of consecutive input records. -The first pattern BEGPAT controls where the range begins, and the -second one ENDPAT controls where it ends. - -They work as follows: BEGPAT is matched against every input record; -when a record matches BEGPAT, the range pattern becomes "turned on". -The range pattern matches this record. As long as it stays turned -on, it automatically matches every input record read. But meanwhile, -ENDPAT is matched against every input record, and when it matches, -the range pattern is turned off again for the following record. Now -we go back to checking BEGPAT against each record. For example: - - awk '$1 == "on", $1 == "off"' - -prints every record between on/off pairs, inclusive. - -The record that turns on the range pattern and the one that turns it -off both match the range pattern. If you don't want to operate on -these records, you can write `if' statements in the rule's action to -distinguish them. - -It is possible for a pattern to be turned both on and off by the same -record, if both conditions are satisfied by that record. Then the -action is executed for just that record. - - - -File: gawk-info, Node: BEGIN/END, Next: Boolean, Prev: Ranges, Up: Patterns - -`BEGIN' and `END' Special Patterns -================================== - -`BEGIN' and `END' are special patterns. They are not used to match -input records. Rather, they are used for supplying start--up or -clean--up information to your `awk' script. A `BEGIN' rule is -executed, once, before the first input record has been read. An -`END' rule is executed, once, after all the input has been read. For -example: - - awk 'BEGIN { print "Analysis of ``foo'' program" } - /foo/ { ++foobar } - END { print "``foo'' appears " foobar " times." }' BBS-list - -This program finds out how many times the string `foo' appears in the -input file `BBS-list'. The `BEGIN' pattern prints out a title for -the report. There is no need to use the `BEGIN' pattern to -initialize the counter `foobar' to zero, as `awk' does this for us -automatically (*note Variables::.). The second rule increments the -variable `foobar' every time a record containing the pattern `foo' is -read. The last rule prints out the value of `foobar' at the end of -the run. - -The special patterns `BEGIN' and `END' do not combine with other -kinds of patterns. - -An `awk' program may have multiple `BEGIN' and/or `END' rules. The -contents of multiple `BEGIN' or `END' rules are treated as if they -had been enclosed in a single rule, in the order that the rules are -encountered in the `awk' program. (This feature was introduced with -the new version of `awk'.) - -Multiple `BEGIN' and `END' sections are also useful for writing -library functions that need to do initialization and/or cleanup of -their own. Note that the order in which library functions are named -on the command line will affect the order in which their `BEGIN' and -`END' rules will be executed. Therefore you have to be careful how -you write your library functions. (*Note Command Line::, for more -information on using library functions.) - -If an `awk' program only has a `BEGIN' rule, and no other rules, then -the program will exit after the `BEGIN' rule has been run. Older -versions of `awk' used to read their input until end of file was -seen. However, if an `END' rule exists as well, then the input will -be read, even if there are no other rules in the program. - -`BEGIN' and `END' rules must have actions; there is no default action -for these rules since there is no current record when they run. - - - -File: gawk-info, Node: Boolean, Next: Conditional Patterns, Prev: BEGIN/END, Up: Patterns - -Boolean Operators and Patterns -============================== - -A boolean pattern is a combination of other patterns using the -boolean operators ``or'' (`||'), ``and'' (`&&'), and ``not'' (`!'), -along with parentheses to control nesting. Whether the boolean -pattern matches an input record is computed from whether its -subpatterns match. - -The subpatterns of a boolean pattern can be regular expressions, -matching expressions, comparisons, or other boolean combinations of -such. Range patterns cannot appear inside boolean operators, since -they don't make sense for classifying a single record, and neither -can the special patterns `BEGIN' and `END', which never match any -input record. - -Here are descriptions of the three boolean operators. - -`PAT1 && PAT2' - Matches if both PAT1 and PAT2 match by themselves. For example, - the following command prints all records in the input file - `BBS-list' that contain both `2400' and `foo'. - - awk '/2400/ && /foo/' BBS-list - - Whether PAT2 matches is tested only if PAT1 succeeds. This can - make a difference when PAT2 contains expressions that have side - effects: in the case of `/foo/ && ($2 == bar++)', the variable - `bar' is not incremented if there is no `foo' in the record. - -`PAT1 || PAT2' - Matches if at least one of PAT1 and PAT2 matches the current - input record. For example, the following command prints all - records in the input file `BBS-list' that contain *either* - `2400' or `foo', or both. - - awk '/2400/ || /foo/' BBS-list - - Whether PAT2 matches is tested only if PAT1 fails to match. - This can make a difference when PAT2 contains expressions that - have side effects. - -`!PAT' - Matches if PAT does not match. For example, the following - command prints all records in the input file `BBS-list' that do - *not* contain the string `foo'. - - awk '! /foo/' BBS-list - -Note that boolean patterns are built from other patterns just as -boolean expressions are built from other expressions (*note Boolean -Ops::.). Any boolean expression is also a valid boolean pattern. -But the converse is not true: simple regular expression patterns such -as `/foo/' are not allowed in boolean expressions. Regular -expressions can appear in boolean expressions only in conjunction -with the matching operators, `~' and `!~'. - - - -File: gawk-info, Node: Conditional Patterns, Prev: Boolean, Up: Patterns - -Conditional Patterns -==================== - -Patterns may use a "conditional expression" much like the conditional -expression of the C language. This takes the form: - - PAT1 ? PAT2 : PAT3 - -The first pattern is evaluated. If it evaluates to TRUE, then the -input record is tested against PAT2. Otherwise it is tested against -PAT3. The conditional pattern matches if PAT2 or PAT3 (whichever one -is selected) matches. - - - -File: gawk-info, Node: Actions, Next: Expressions, Prev: Patterns, Up: Top - -Actions: The Basics -******************* - -The "action" part of an `awk' rule tells `awk' what to do once a -match for the pattern is found. An action consists of one or more -`awk' "statements", enclosed in curly braces (`{' and `}'). The -curly braces must be used even if the action contains only one -statement, or even if it contains no statements at all. Action -statements are separated by newlines or semicolons. - -Besides the print statements already covered (*note Printing::.), -there are four kinds of action statements: expressions, control -statements, compound statements, and function definitions. - - * "Expressions" include assignments, arithmetic, function calls, - and more (*note Expressions::.). - - * "Control statements" specify the control flow of `awk' programs. - The `awk' language gives you C--like constructs (`if', `for', - `while', and so on) as well as a few special ones (*note - Statements::.). - - * A "compound statement" is just one or more `awk' statements - enclosed in curly braces. This way you can group several - statements to form the body of an `if' or similar statement. - - * You can define "user--defined functions" for use elsewhere in - the `awk' program (*note User-defined::.). - - - -File: gawk-info, Node: Expressions, Next: Statements, Prev: Actions, Up: Top - -Actions: Expressions -******************** - -Expressions are the basic building block of `awk' actions. An -expression evaluates to a value, which you can print, test, store in -a variable or pass to a function. - -But, beyond that, an expression can assign a new value to a variable -or a field, with an assignment operator. - -An expression can serve as a statement on its own. Most other action -statements are made up of various combinations of expressions. As in -other languages, expressions in `awk' include variables, array -references, constants, and function calls, as well as combinations of -these with various operators. - -* Menu: - -* Constants:: String and numeric constants. -* Variables:: Variables give names to values for future use. -* Fields:: Field references such as `$1' are also expressions. -* Arrays:: Array element references are expressions. - -* Arithmetic Ops:: Arithmetic operations (`+', `-', etc.) -* Concatenation:: Concatenating strings. -* Comparison Ops:: Comparison of numbers and strings with `<', etc. -* Boolean Ops:: Combining comparison expressions using boolean operators - `||' (``or''), `&&' (``and'') and `!' (``not''). - -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. - -* Conversion:: The conversion of strings to numbers and vice versa. -* Conditional Exp:: Conditional expressions select between two subexpressions - under control of a third subexpression. -* Function Calls:: A function call is an expression. - - - -File: gawk-info, Node: Constants, Next: Variables, Up: Expressions - -Constant Expressions -==================== - -There are two types of constants: numeric constants and string -constants. - -The "numeric constant" is a number. This number can be an integer, a -decimal fraction, or a number in scientific (exponential) notation. -Note that all numeric values are represented within `awk' in -double--precision floating point. Here are some examples of numeric -constants, which all have the same value: - - 105 - 1.05e+2 - 1050e-1 - -A string constant consists of a sequence of characters enclosed in -double--quote marks. For example: - - "parrot" - -represents the string constant `parrot'. Strings in `gawk' can be of -any length and they can contain all the possible 8--bit ASCII -characters including ASCII NUL. Other `awk' implementations may have -difficulty with some character codes. - -Some characters cannot be included literally in a string. You -represent them instead with "escape sequences", which are character -sequences beginning with a backslash (`\'). - -One use of the backslash is to include double--quote characters in a -string. Since a plain double--quote would end the string, you must -use `\"'. Backslash itself is another character that can't be -included normally; you write `\\' to put one backslash in the string. - -Another use of backslash is to represent unprintable characters such -as newline. While there is nothing to stop you from writing these -characters directly in an `awk' program, they may look ugly. - -`\b' - Represents a backspaced, H'. - -`\f' - Represents a formfeed, L'. - -`\n' - Represents a newline, J'. - -`\r' - Represents a carriage return, M'. - -`\t' - Represents a horizontal tab, I'. - -`\v' - Represents a vertical tab, K'. - -`\NNN' - Represents the octal value NNN, where NNN is one to three digits - between 0 and 7. For example, the code for the ASCII ESC - (escape) character is `\033'. - - - -File: gawk-info, Node: Variables, Next: Arithmetic Ops, Prev: Constants, Up: Expressions - -Variables -========= - -Variables let you give names to values and refer to them later. You -have already seen variables in many of the examples. The name of a -variable must be a sequence of letters, digits and underscores, but -it may not begin with a digit. Case is significant in variable -names; `a' and `A' are distinct variables. - -A variable name is a valid expression by itself; it represents the -variable's current value. Variables are given new values with -"assignment operators" and "increment operators". *Note Assignment -Ops::. - -A few variables have special built--in meanings, such as `FS', the -field separator, and `NF', the number of fields in the current input -record. *Note Special::, for a list of them. Special variables can -be used and assigned just like all other variables, but their values -are also used or changed automatically by `awk'. Each special -variable's name is made entirely of upper case letters. - -Variables in `awk' can be assigned either numeric values or string -values. By default, variables are initialized to the null string, -which has the numeric value zero. So there is no need to -``initialize'' each variable explicitly in `awk', the way you would -need to do in C or most other traditional programming languages. - - - -File: gawk-info, Node: Arithmetic Ops, Next: Concatenation, Prev: Variables, Up: Expressions - -Arithmetic Operators -==================== - -The `awk' language uses the common arithmetic operators when -evaluating expressions. All of these arithmetic operators follow -normal precedence rules, and work as you would expect them to. This -example divides field 3 by field 4, adds field 2, stores the result -into field 1, and prints the results: - - awk '{ $1 = $2 + $3 / $4; print }' inventory-shipped - -The arithmetic operators in `awk' are: - -`X + Y' - Addition. - -`X - Y' - Subtraction. - -`- X' - Negation. - -`X / Y' - Division. Since all numbers in `awk' are double--precision - floating point, the result is not rounded to an integer: `3 / 4' - has the value 0.75. - -`X * Y' - Multiplication. - -`X % Y' - Remainder. The quotient is rounded toward zero to an integer, - multiplied by Y and this result is subtracted from X. This - operation is sometimes known as ``trunc--mod''. The following - relation always holds: - - `b * int(a / b) + (a % b) == a' - - One undesirable effect of this definition of remainder is that X - % Y is negative if X is negative. Thus, - - -17 % 8 = -1 - -`X ^ Y' -`X ** Y' - Exponentiation: X raised to the Y power. `2 ^ 3' has the value - 8. The character sequence `**' is equivalent to `^'. - - - -File: gawk-info, Node: Concatenation, Next: Comparison Ops, Prev: Arithmetic Ops, Up: Expressions - -String Concatenation -==================== - -There is only one string operation: concatenation. It does not have -a specific operator to represent it. Instead, concatenation is -performed by writing expressions next to one another, with no -operator. For example: - - awk '{ print "Field number one: " $1 }' BBS-list - -produces, for the first record in `BBS-list': - - Field number one: aardvark - -If you hadn't put the space after the `:', the line would have run -together. For example: - - awk '{ print "Field number one:" $1 }' BBS-list - -produces, for the first record in `BBS-list': - - Field number one:aardvark - - - -File: gawk-info, Node: Comparison Ops, Next: Boolean Ops, Prev: Concatenation, Up: Expressions - -Comparison Expressions -====================== - -"Comparison expressions" use "relational operators" to compare -strings or numbers. The relational operators are the same as in C. -Here is a table of them: - -`X < Y' - True if X is less than Y. - -`X <= Y' - True if X is less than or equal to Y. - -`X > Y' - True if X is greater than Y. - -`X >= Y' - True if X is greater than or equal to Y. - -`X == Y' - True if X is equal to Y. - -`X != Y' - True if X is not equal to Y. - -`X ~ REGEXP' - True if regexp REGEXP matches the string X. - -`X !~ REGEXP' - True if regexp REGEXP does not match the string X. - -`SUBSCRIPT in ARRAY' - True if array ARRAY has an element with the subscript SUBSCRIPT. - -Comparison expressions have the value 1 if true and 0 if false. - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (*note Conversion::.). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, `"10"' is less than `"9"'. - -For example, - - $1 == "foo" - -has the value of 1, or is true, if the first field of the current -input record is precisely `foo'. By contrast, - - $1 ~ /foo/ - -has the value 1 if the first field contains `foo'. - - - -File: gawk-info, Node: Boolean Ops, Next: Assignment Ops, Prev: Comparison Ops, Up: Expressions - -Boolean Operators -================= - -A boolean expression is combination of comparison expressions or -matching expressions, using the boolean operators ``or'' (`||'), -``and'' (`&&'), and ``not'' (`!'), along with parentheses to control -nesting. The truth of the boolean expression is computed by -combining the truth values of the component expressions. - -Boolean expressions can be used wherever comparison and matching -expressions can be used. They can be used in `if' and `while' -statements. They have numeric values (1 if true, 0 if false). - -In addition, every boolean expression is also a valid boolean -pattern, so you can use it as a pattern to control the execution of -rules. - -Here are descriptions of the three boolean operators, with an example -of each. It may be instructive to compare these examples with the -analogous examples of boolean patterns (*note Boolean::.), which use -the same boolean operators in patterns instead of expressions. - -`BOOLEAN1 && BOOLEAN2' - True if both BOOLEAN1 and BOOLEAN2 are true. For example, the - following statement prints the current input record if it - contains both `2400' and `foo'. - - if ($0 ~ /2400/ && $0 ~ /foo/) print - - The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is - true. This can make a difference when BOOLEAN2 contains - expressions that have side effects: in the case of `$0 ~ /foo/ - && ($2 == bar++)', the variable `bar' is not incremented if - there is no `foo' in the record. - -`BOOLEAN1 || BOOLEAN2' - True if at least one of BOOLEAN1 and BOOLEAN2 is true. For - example, the following command prints all records in the input - file `BBS-list' that contain *either* `2400' or `foo', or both. - - awk '{ if ($0 ~ /2400/ || $0 ~ /foo/) print }' BBS-list - - The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is - true. This can make a difference when BOOLEAN2 contains - expressions that have side effects. - -`!BOOLEAN' - True if BOOLEAN is false. For example, the following program - prints all records in the input file `BBS-list' that do *not* - contain the string `foo'. - - awk '{ if (! ($0 ~ /foo/)) print }' BBS-list - - - -File: gawk-info, Node: Assignment Ops, Next: Increment Ops, Prev: Boolean Ops, Up: Expressions - -Assignment Operators -==================== - -An "assignment" is an expression that stores a new value into a -variable. For example, let's assign the value 1 to the variable `z': - - z = 1 - -After this expression is executed, the variable `z' has the value 1. -Whatever old value `z' had before the assignment is forgotten. - -The `=' sign is called an "assignment operator". It is the simplest -assignment operator because the value of the right--hand operand is -stored unchanged. - -The left--hand operand of an assignment can be a variable (*note -Variables::.), a field (*note Changing Fields::.) or an array element -(*note Arrays::.). These are all called "lvalues", which means they -can appear on the left side of an assignment operator. The -right--hand operand may be any expression; it produces the new value -which the assignment stores in the specified variable, field or array -element. - -Assignments can store string values also. For example, this would -store the value `"this food is good"' in the variable `message': - - thing = "food" - predicate = "good" - message = "this " thing " is " predicate - -(This also illustrates concatenation of strings.) - -It is important to note that variables do *not* have permanent types. -The type of a variable is simply the type of whatever value it -happens to hold at the moment. In the following program fragment, -the variable `foo' has a numeric value at first, and a string value -later on: - - foo = 1 - print foo - foo = "bar" - print foo - -When the second assignment gives `foo' a string value, the fact that -it previously had a numeric value is forgotten. - -An assignment is an expression, so it has a value: the same value -that is assigned. Thus, `z = 1' as an expression has the value 1. -One consequence of this is that you can write multiple assignments -together: - - x = y = z = 0 - -stores the value 0 in all three variables. It does this because the -value of `z = 0', which is 0, is stored into `y', and then the value -of `y = z = 0', which is 0, is stored into `x'. - -You can use an assignment anywhere an expression is called for. For -example, it is valid to write `x != (y = 1)' to set `y' to 1 and then -test whether `x' equals 1. But this style tends to make programs -hard to read; except in a one--shot program, you should rewrite it to -get rid of such nesting of assignments. This is never very hard. - -Aside from `=', there are several other assignment operators that do -arithmetic with the old value of the variable. For example, the -operator `+=' computes a new value by adding the right--hand value to -the old value of the variable. Thus, the following assignment adds 5 -to the value of `foo': - - foo += 5 - -This is precisely equivalent to the following: - - foo = foo + 5 - -Use whichever one makes the meaning of your program clearer. - -Here is a table of the arithmetic assignment operators. In each -case, the right--hand operand is an expression whose value is -converted to a number. - -`LVALUE += INCREMENT' - Adds INCREMENT to the value of LVALUE to make the new value of - LVALUE. - -`LVALUE -= DECREMENT' - Subtracts DECREMENT from the value of LVALUE. - -`LVALUE *= COEFFICIENT' - Multiplies the value of LVALUE by COEFFICIENT. - -`LVALUE /= QUOTIENT' - Divides the value of LVALUE by QUOTIENT. - -`LVALUE %= MODULUS' - Sets LVALUE to its remainder by MODULUS. - -`LVALUE ^= POWER' -`LVALUE **= POWER' - Raises LVALUE to the power POWER. - - - -File: gawk-info, Node: Increment Ops, Next: Conversion, Prev: Assignment Ops, Up: Expressions - -Increment Operators -=================== - -"Increment operators" increase or decrease the value of a variable by -1. You could do the same thing with an assignment operator, so the -increment operators add no power to the `awk' language; but they are -convenient abbreviations for something very common. - -The operator to add 1 is written `++'. There are two ways to use -this operator: pre--incrementation and post--incrementation. - -To pre--increment a variable V, write `++V'. This adds 1 to the -value of V and that new value is also the value of this expression. -The assignment expression `V += 1' is completely equivalent. - -Writing the `++' after the variable specifies post--increment. This -increments the variable value just the same; the difference is that -the value of the increment expression itself is the variable's *old* -value. Thus, if `foo' has value 4, then the expression `foo++' has -the value 4, but it changes the value of `foo' to 5. - -The post--increment `foo++' is nearly equivalent to writing `(foo += -1) - 1'. It is not perfectly equivalent because all numbers in `awk' -are floating point: in floating point, `foo + 1 - 1' does not -necessarily equal `foo'. But the difference will be minute as long -as you stick to numbers that are fairly small (less than a trillion). - -Any lvalue can be incremented. Fields and array elements are -incremented just like variables. - -The decrement operator `--' works just like `++' except that it -subtracts 1 instead of adding. Like `++', it can be used before the -lvalue to pre--decrement or after it to post--decrement. - -Here is a summary of increment and decrement expressions. - -`++LVALUE' - This expression increments LVALUE and the new value becomes the - value of this expression. - -`LVALUE++' - This expression causes the contents of LVALUE to be incremented. - The value of the expression is the *old* value of LVALUE. - -`--LVALUE' - Like `++LVALUE', but instead of adding, it subtracts. It - decrements LVALUE and delivers the value that results. - -`LVALUE--' - Like `LVALUE++', but instead of adding, it subtracts. It - decrements LVALUE. The value of the expression is the *old* - value of LVALUE. - - - -File: gawk-info, Node: Conversion, Next: Conditional Exp, Prev: Increment Ops, Up: Expressions - -Conversion of Strings and Numbers -================================= - -Strings are converted to numbers, and numbers to strings, if the -context of your `awk' statement demands it. For example, if the -values of `foo' or `bar' in the expression `foo + bar' happen to be -strings, they are converted to numbers before the addition is -performed. If numeric values appear in string concatenation, they -are converted to strings. Consider this: - - two = 2; three = 3 - print (two three) + 4 - -This eventually prints the (numeric) value `27'. The numeric -variables `two' and `three' are converted to strings and concatenated -together, and the resulting string is converted back to a number -before adding `4'. The resulting numeric value `27' is printed. - -If, for some reason, you need to force a number to be converted to a -string, concatenate the null string with that number. To force a -string to be converted to a number, add zero to that string. Strings -that can't be interpreted as valid numbers are given the numeric -value zero. - -The exact manner in which numbers are converted into strings is -controlled by the `awk' special variable `OFMT' (*note Special::.). -Numbers are converted using a special version of the `sprintf' -function (*note Built-in::.) with `OFMT' as the format specifier. - -`OFMT''s default value is `"%.6g"', which prints a value with at -least six significant digits. You might want to change it to specify -more precision, if your version of `awk' uses double precision -arithmetic. Double precision on most modern machines gives you 16 or -17 decimal digits of precision. - -Strange results can happen if you set `OFMT' to a string that doesn't -tell `sprintf' how to format floating point numbers in a useful way. -For example, if you forget the `%' in the format, all numbers will be -converted to the same constant string. - - - -File: gawk-info, Node: Conditional Exp, Next: Function Calls, Prev: Conversion, Up: Expressions - -Conditional Expressions -======================= - -A "conditional expression" is a special kind of expression with three -operands. It allows you to use one expression's value to select one -of two other expressions. - -The conditional expression looks the same as in the C language: - - SELECTOR ? IF-TRUE-EXP : IF-FALSE-EXP - -There are three subexpressions. The first, SELECTOR, is always -computed first. If it is ``true'' (not zero) then IF-TRUE-EXP is -computed next and its value becomes the value of the whole expression. -Otherwise, IF-FALSE-EXP is computed next and its value becomes the -value of the whole expression. - -For example, this expression produces the absolute value of `x': - - x > 0 ? x : -x - -Each time the conditional expression is computed, exactly one of -IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This -is important when the expressions contain side effects. For example, -this conditional expression examines element `i' of either array `a' -or array `b', and increments `i'. - - x == y ? a[i++] : b[i++] - -This is guaranteed to increment `i' exactly once, because each time -one or the other of the two increment expressions will be executed -and the other will not be. - - - -File: gawk-info, Node: Function Calls, Prev: Conditional Exp, Up: Expressions - -Function Calls -============== - -A "function" is a name for a particular calculation. Because it has -a name, you can ask for it by name at any point in the program. For -example, the function `sqrt' computes the square root of a number. - -A fixed set of functions are "built in", which means they are -available in every `awk' program. The `sqrt' function is one of -these. *Note Built-in::, for a list of built--in functions and their -descriptions. In addition, you can define your own functions in the -program for use elsewhere in the same program. *Note User-defined::, -for how to do this. - -The way to use a function is with a "function call" expression, which -consists of the function name followed by a list of "arguments" in -parentheses. The arguments are expressions which give the raw -materials for the calculation that the function will do. When there -is more than one argument, they are separated by commas. If there -are no arguments, write just `()' after the function name. - -*Do not put any space between the function name and the -open--parenthesis!* A user--defined function name looks just like -the name of a variable, and space would make the expression look like -concatenation of a variable with an expression inside parentheses. -Space before the parenthesis is harmless with built--in functions, -but it is best not to get into the habit of using space, lest you do -likewise for a user--defined function one day by mistake. - -Each function needs a particular number of arguments. For example, -the `sqrt' function must be called with a single argument, like this: - - sqrt(ARGUMENT) - -The argument is the number to take the square root of. - -Some of the built--in functions allow you to omit the final argument. -If you do so, they will use a reasonable default. *Note Built-in::, -for full details. If arguments are omitted in calls to user--defined -functions, then those arguments are treated as local variables, -initialized to the null string (*note User-defined::.). - -Like every other expression, the function call has a value, which is -computed by the function based on the arguments you give it. In this -example, the value of `sqrt(ARGUMENT)' is the square root of the -argument. A function can also have side effects, such as assigning -the values of certain variables or doing I/O. - -Here is a command to read numbers, one number per line, and print the -square root of each one: - - awk '{ print "The square root of", $1, "is", sqrt($1) }' - - - -File: gawk-info, Node: Statements, Next: Arrays, Prev: Expressions, Up: Top - -Actions: Statements -******************* - -"Control statements" such as `if', `while', and so on control the -flow of execution in `awk' programs. Most of the control statements -in `awk' are patterned on similar statements in C. - -The simplest kind of statement is an expression. The other kinds of -statements start with special keywords such as `if' and `while', to -distinguish them from simple expressions. - -In all the examples in this chapter, BODY can be either a single -statement or a group of statements. Groups of statements are -enclosed in braces, and separated by newlines or semicolons. - -* Menu: - -* Expressions:: One kind of statement simply computes an expression. - -* If:: Conditionally execute some `awk' statements. - -* While:: Loop until some condition is satisfied. - -* Do:: Do specified action while looping until some - condition is satisfied. - -* For:: Another looping statement, that provides - initialization and increment clauses. - -* Break:: Immediately exit the innermost enclosing loop. - -* Continue:: Skip to the end of the innermost enclosing loop. - -* Next:: Stop processing the current input record. - -* Exit:: Stop execution of `awk'. - - - -File: gawk-info, Node: If, Next: While, Up: Statements - -The `if' Statement -================== - -The `if'-`else' statement is `awk''s decision--making statement. The -`else' part of the statement is optional. - - `if (CONDITION) BODY1 else BODY2' - -Here CONDITION is an expression that controls what the rest of the -statement will do. If CONDITION is true, BODY1 is executed; -otherwise, BODY2 is executed (assuming that the `else' clause is -present). The condition is considered true if it is nonzero or -nonnull. - -Here is an example: - - awk '{ if (x % 2 == 0) - print "x is even" - else - print "x is odd" }' - -In this example, if the statement containing `x' is found to be true -(that is, x is divisible by 2), then the first `print' statement is -executed, otherwise the second `print' statement is performed. - -If the `else' appears on the same line as BODY1, and BODY1 is a -single statement, then a semicolon must separate BODY1 from `else'. -To illustrate this, let's rewrite the previous example: - - awk '{ if (x % 2 == 0) print "x is even"; else - print "x is odd" }' - -If you forget the `;', `awk' won't be able to parse it, and you will -get a syntax error. - -We would not actually write this example this way, because a human -reader might fail to see the `else' if it were not the first thing on -its line. - - - -File: gawk-info, Node: While, Next: Do, Prev: If, Up: Statements - -The `while' Statement -===================== - -In programming, a loop means a part of a program that is (or at least -can be) executed two or more times in succession. - -The `while' statement is the simplest looping statement in `awk'. It -repeatedly executes a statement as long as a condition is true. It -looks like this: - - while (CONDITION) - BODY - -Here BODY is a statement that we call the "body" of the loop, and -CONDITION is an expression that controls how long the loop keeps -running. - -The first thing the `while' statement does is test CONDITION. If -CONDITION is true, it executes the statement BODY. After BODY has -been executed, CONDITION is tested again and this process is repeated -until CONDITION is no longer true. If CONDITION is initially false, -the body of the loop is never executed. - - awk '{ i = 1 - while (i <= 3) { - print $i - i++ - } - }' - -This example prints the first three input fields, one per line. - -The loop works like this: first, the value of `i' is set to 1. Then, -the `while' tests whether `i' is less than or equal to three. This -is the case when `i' equals one, so the `i'-th field is printed. -Then the `i++' increments the value of `i' and the loop repeats. - -When `i' reaches 4, the loop exits. Here BODY is a compound -statement enclosed in braces. As you can see, a newline is not -required between the condition and the body; but using one makes the -program clearer unless the body is a compound statement or is very -simple. - - - -File: gawk-info, Node: Do, Next: For, Prev: While, Up: Statements - -The `do'--`while' Statement -=========================== - -The `do' loop is a variation of the `while' looping statement. The -`do' loop executes the BODY once, then repeats BODY as long as -CONDITION is true. It looks like this: - - do - BODY - while (CONDITION) - -Even if CONDITION is false at the start, BODY is executed at least -once (and only once, unless executing BODY makes CONDITION true). -Contrast this with the corresponding `while' statement: - - while (CONDITION) - BODY - -This statement will not execute BODY even once if CONDITION is false -to begin with. - -Here is an example of a `do' statement: - - awk '{ i = 1 - do { - print $0 - i++ - } while (i <= 10) - }' - -prints each input record ten times. It isn't a very realistic -example, since in this case an ordinary `while' would do just as -well. But this is normal; there is only occasionally a real use for -a `do' statement. - - - -File: gawk-info, Node: For, Next: Break, Prev: Do, Up: Statements - -The `for' Statement -=================== - -The `for' statement makes it more convenient to count iterations of a -loop. The general form of the `for' statement looks like this: - - for (INITIALIZATION; CONDITION; INCREMENT) - BODY - -This statement starts by executing INITIALIZATION. Then, as long as -CONDITION is true, it repeatedly executes BODY and then INCREMENT. -Typically INITIALIZATION sets a variable to either zero or one, -INCREMENT adds 1 to it, and CONDITION compares it against the desired -number of iterations. - -Here is an example of a `for' statement: - - awk '{ for (i = 1; i <= 3; i++) - print $i - }' - -This prints the first three fields of each input record, one field -per line. - -In the `for' statement, BODY stands for any statement, but -INITIALIZATION, CONDITION and INCREMENT are just expressions. You -cannot set more than one variable in the INITIALIZATION part unless -you use a multiple assignment statement such as `x = y = 0', which is -possible only if all the initial values are equal. (But you can -initialize additional variables by writing their assignments as -separate statements preceding the `for' loop.) - -The same is true of the INCREMENT part; to increment additional -variables, you must write separate statements at the end of the loop. -The C compound expression, using C's comma operator, would be useful -in this context, but it is not supported in `awk'. - -Most often, INCREMENT is an increment expression, as in the example -above. But this is not required; it can be any expression whatever. -For example, this statement prints odd numbers from 1 to 100: - - # print odd numbers from 1 to 100 - for (i = 1; i <= 100; i += 2) - print i - -Any of the three expressions following `for' may be omitted if you -don't want it to do anything. Thus, `for (;x > 0;)' is equivalent to -`while (x > 0)'. If the CONDITION part is empty, it is treated as -TRUE, effectively yielding an infinite loop. - -In most cases, a `for' loop is an abbreviation for a `while' loop, as -shown here: - - INITIALIZATION - while (CONDITION) { - BODY - INCREMENT - } - -(The only exception is when the `continue' statement (*note -Continue::.) is used inside the loop; changing a `for' statement to a -`while' statement in this way can change the effect of the `continue' -statement inside the loop.) - -The `awk' language has a `for' statement in addition to a `while' -statement because often a `for' loop is both less work to type and -more natural to think of. Counting the number of iterations is very -common in loops. It can be easier to think of this counting as part -of looping rather than as something to do inside the loop. - -The next section has more complicated examples of `for' loops. - -There is an alternate version of the `for' loop, for iterating over -all the indices of an array: - - for (i in array) - PROCESS array[i] - -*Note Arrays::, for more information on this version of the `for' loop. - - - -File: gawk-info, Node: Break, Next: Continue, Prev: For, Up: Statements - -The `break' Statement -===================== - -The `break' statement jumps out of the innermost `for', `while', or -`do'--`while' loop that encloses it. The following example finds the -smallest divisor of any number, and also identifies prime numbers: - - awk '# find smallest divisor of num - { num = $1 - for (div = 2; div*div <= num; div++) - if (num % div == 0) - break - if (num % div == 0) - printf "Smallest divisor of %d is %d\n", num, div - else - printf "%d is prime\n", num }' - -When the remainder is zero in the first `if' statement, `awk' -immediately "breaks" out of the containing `for' loop. This means -that `awk' proceeds immediately to the statement following the loop -and continues processing. (This is very different from the `exit' -statement (*note Exit::.) which stops the entire `awk' program.) - -Here is another program equivalent to the previous one. It -illustrates how the CONDITION of a `for' or `while' could just as -well be replaced with a `break' inside an `if': - - awk '# find smallest divisor of num - { num = $1 - for (div = 2; ; div++) { - if (num % div == 0) { - printf "Smallest divisor of %d is %d\n", num, div - break - } - if (div*div > num) { - printf "%d is prime\n", num - break - } - } - }' - - - -File: gawk-info, Node: Continue, Next: Next, Prev: Break, Up: Statements - -The `continue' Statement -======================== - -The `continue' statement, like `break', is used only inside `for', -`while', and `do'--`while' loops. It skips over the rest of the loop -body, causing the next cycle around the loop to begin immediately. -Contrast this with `break', which jumps out of the loop altogether. -Here is an example: - - # print names that don't contain the string "ignore" - - # first, save the text of each line - { names[NR] = $0 } - - # print what we're interested in - END { - for (x in names) { - if (names[x] ~ /ignore/) - continue - print names[x] - } - } - -If any of the input records contain the string `ignore', this example -skips the print statement and continues back to the first statement -in the loop. - -This isn't a practical example of `continue', since it would be just -as easy to write the loop like this: - - for (x in names) - if (x !~ /ignore/) - print x - -The `continue' statement causes `awk' to skip the rest of what is -inside a `for' loop, but it resumes execution with the increment part -of the `for' loop. The following program illustrates this fact: - - awk 'BEGIN { - for (x = 0; x <= 20; x++) { - if (x == 5) - continue - printf ("%d ", x) - } - print "" - }' - -This program prints all the numbers from 0 to 20, except for 5, for -which the `printf' is skipped. Since the increment `x++' is not -skipped, `x' does not remain stuck at 5. - - - -File: gawk-info, Node: Next, Next: Exit, Prev: Continue, Up: Statements - -The `next' Statement -==================== - -The `next' statement forces `awk' to immediately stop processing the -current record and go on to the next record. This means that no -further rules are executed for the current record. The rest of the -current rule's action is not executed either. - -Contrast this with the effect of the `getline' function (*note -Getline::.). That too causes `awk' to read the next record -immediately, but it does not alter the flow of control in any way. -So the rest of the current action executes with a new input record. - -At the grossest level, `awk' program execution is a loop that reads -an input record and then tests each rule pattern against it. If you -think of this loop as a `for' statement whose body contains the -rules, then the `next' statement is analogous to a `continue' -statement: it skips to the end of the body of the loop, and executes -the increment (which reads another record). - -For example, if your `awk' program works only on records with four -fields, and you don't want it to fail when given bad input, you might -use the following rule near the beginning of the program: - - NF != 4 { - printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty" - next - } - -so that the following rules will not see the bad record. The error -message is redirected to `/dev/tty' (the terminal), so that it won't -get lost amid the rest of the program's regular output. - - - -File: gawk-info, Node: Exit, Prev: Next, Up: Statements - -The `exit' Statement -==================== - -The `exit' statement causes `awk' to immediately stop executing the -current rule and to stop processing input; any remaining input is -ignored. - -If an `exit' statement is executed from a `BEGIN' rule the program -stops processing everything immediately. No input records will be -read. However, if an `END' rule is present, it will be executed -(*note BEGIN/END::.). - -If `exit' is used as part of an `END' rule, it causes the program to -stop immediately. - -An `exit' statement that is part an ordinary rule (that is, not part -of a `BEGIN' or `END' rule) stops the execution of any further -automatic rules, but the `END' rule is executed if there is one. If -you don't want the `END' rule to do its job in this case, you can set -a variable to nonzero before the `exit' statement, and check that -variable in the `END' rule. - -If an argument is supplied to `exit', its value is used as the exit -status code for the `awk' process. If no argument is supplied, -`exit' returns status zero (success). - -For example, let's say you've discovered an error condition you -really don't know how to handle. Conventionally, programs report -this by exiting with a nonzero status. Your `awk' program can do -this using an `exit' statement with a nonzero argument. Here's an -example of this: - - BEGIN { - if (("date" | getline date_now) < 0) { - print "Can't get system date" - exit 4 - } - } - - - -File: gawk-info, Node: Arrays, Next: Built-in, Prev: Statements, Up: Top - -Actions: Using Arrays in `awk' -****************************** - -An "array" is a table of various values, called "elements". The -elements of an array are distinguished by their "indices". Names of -arrays in `awk' are strings of alphanumeric characters and -underscores, just like regular variables. - -You cannot use the same identifier as both a variable and as an array -name in one `awk' program. - -* Menu: - -* Intro: Array Intro. Basic facts abou arrays in `awk'. -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Example: Array Example. Sample program explained. - -* Scanning an Array:: A variation of the `for' statement. It loops - through the indices of an array's existing elements. - -* Delete:: The `delete' statement removes an element from an array. - -* Multi-dimensional:: Emulating multi--dimensional arrays in `awk'. -* Multi-scanning:: Scanning multi--dimensional arrays. - - - -File: gawk-info, Node: Array Intro, Next: Reference to Elements, Up: Arrays - -Introduction to Arrays -====================== - -The `awk' language has one--dimensional "arrays" for storing groups -of related strings or numbers. Each array must have a name; valid -array names are the same as valid variable names, and they do -conflict with variable names: you can't have both an array and a -variable with the same name at any point in an `awk' program. - -Arrays in `awk' superficially resemble arrays in other programming -languages; but there are fundamental differences. In `awk', you -don't need to declare the size of an array before you start to use it. -What's more, in `awk' any number or even a string may be used as an -array index. - -In most other languages, you have to "declare" an array and specify -how many elements or components it has. In such languages, the -declaration causes a contiguous block of memory to be allocated for -that many elements. An index in the array must be a positive -integer; for example, the index 0 specifies the first element in the -array, which is actually stored at the beginning of the block of -memory. Index 1 specifies the second element, which is stored in -memory right after the first element, and so on. It is impossible to -add more elements to the array, because it has room for only as many -elements as you declared. (Some languages have arrays whose first -index is 1, others require that you specify both the first and last -index when you declare the array. In such a language, an array could -be indexed, for example, from -3 to 17.) A contiguous array of four -elements might look like this, conceptually, if the element values -are 8, `"foo"', `""' and 30: - - +--------+--------+-------+--------+ - | 8 | "foo" | "" | 30 | value - +--------+--------+-------+--------+ - 0 1 2 3 index - -Only the values are stored; the indices are implicit from the order -of the values. 8 is the value at index 0, because 8 appears in the -position with 0 elements before it. - -Arrays in `awk' are different: they are "associative". This means -that each array is a collection of pairs: an index, and its -corresponding array element value: - - Element 4 Value 30 - Element 2 Value "foo" - Element 1 Value 8 - Element 3 Value "" - -We have shown the pairs in jumbled order because their order doesn't -mean anything. - -One advantage of an associative array is that new pairs can be added -at any time. For example, suppose we add to that array a tenth -element whose value is `"number ten"'. The result is this: - - Element 10 Value "number ten" - Element 4 Value 30 - Element 2 Value "foo" - Element 1 Value 8 - Element 3 Value "" - -Now the array is "sparse" (i.e. some indices are missing): it has -elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or 9. - -Another consequence of associative arrays is that the indices don't -have to be positive integers. Any number, or even a string, can be -an index. For example, here is an array which translates words from -English into French: - - Element "dog" Value "chien" - Element "cat" Value "chat" - Element "one" Value "un" - Element 1 Value "un" - -Here we decided to translate the number 1 in both spelled--out and -numeral form--thus illustrating that a single array can have both -numbers and strings as indices. - -When `awk' creates an array for you, e.g. with the `split' built--in -function (*note String Functions::.), that array's indices start at -the number one. - - - -File: gawk-info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays - -Referring to an Array Element -============================= - -The principal way of using an array is to refer to one of its elements. -An array reference is an expression which looks like this: - - ARRAY[INDEX] - -Here ARRAY is the name of an array. The expression INDEX is the -index of the element of the array that you want. The value of the -array reference is the current value of that array element. - -For example, `foo[4.3]' is an expression for the element of array -`foo' at index 4.3. - -If you refer to an array element that has no recorded value, the -value of the reference is `""', the null string. This includes -elements to which you have not assigned any value, and elements that -have been deleted (*note Delete::.). Such a reference automatically -creates that array element, with the null string as its value. (In -some cases, this is unfortunate, because it might waste memory inside -`awk'). - -You can find out if an element exists in an array at a certain index -with the expression: - - INDEX in ARRAY - -This expression tests whether or not the particular index exists, -without the side effect of creating that element if it is not present. -The expression has the value 1 (true) if `ARRAY[SUBSCRIPT]' exists, -and 0 (false) if it does not exist. - -For example, to find out whether the array `frequencies' contains the -subscript `"2"', you would ask: - - if ("2" in frequencies) print "Subscript \"2\" is present." - -Note that this is *not* a test of whether or not the array -`frequencies' contains an element whose *value* is `"2"'. (There is -no way to that except to scan all the elements.) Also, this *does -not* create `frequencies["2"]', while the following (incorrect) -alternative would: - - if (frequencies["2"] != "") print "Subscript \"2\" is present." - - - -File: gawk-info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays - -Assigning Array Elements -======================== - -Array elements are lvalues: they can be assigned values just like -`awk' variables: - - ARRAY[SUBSCRIPT] = VALUE - -Here ARRAY is the name of your array. The expression SUBSCRIPT is -the index of the element of the array that you want to assign a -value. The expression VALUE is the value you are assigning to that -element of the array. - - - -File: gawk-info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays - -Basic Example of an Array -========================= - -The following program takes a list of lines, each beginning with a -line number, and prints them out in order of line number. The line -numbers are not in order, however, when they are first read: they -are scrambled. This program sorts the lines by making an array using -the line numbers as subscripts. It then prints out the lines in -sorted order of their numbers. It is a very simple program, and will -get confused if it encounters repeated numbers, gaps, or lines that -don't begin with a number. - - BEGIN { - max=0 - } - - { - if ($1 > max) - max = $1 - arr[$1] = $0 - } - - END { - for (x = 1; x <= max; x++) - print arr[x] - } - -The first rule just initializes the variable `max'. (This is not -strictly necessary, since an uninitialized variable has the null -string as its value, and the null string is effectively zero when -used in a context where a number is required.) - -The second rule keeps track of the largest line number seen so far; -it also stores each line into the array `arr', at an index that is -the line's number. - -The third rule runs after all the input has been read, to print out -all the lines. - -When this program is run with the following input: - - 5 I am the Five man - 2 Who are you? The new number two! - 4 . . . And four on the floor - 1 Who is number one? - 3 I three you. - - its output is this: - - 1 Who is number one? - 2 Who are you? The new number two! - 3 I three you. - 4 . . . And four on the floor - 5 I am the Five man - - - -File: gawk-info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays - -Scanning All Elements of an Array -================================= - -In programs that use arrays, often you need a loop that will execute -once for each element of an array. In other languages, where arrays -are contiguous and indices are limited to positive integers, this is -easy: the largest index is one less than the length of the array, and -you can find all the valid indices by counting from zero up to that -value. This technique won't do the job in `awk', since any number or -string may be an array index. So `awk' has a special kind of `for' -statement for scanning an array: - - for (VAR in ARRAY) - BODY - -This loop executes BODY once for each different value that your -program has previously used as an index in ARRAY, with the variable -VAR set to that index. - -Here is a program that uses this form of the `for' statement. The -first rule scans the input records and notes which words appear (at -least once) in the input, by storing a 1 into the array `used' with -the word as index. The second rule scans the elements of `used' to -find all the distinct words that appear in the input. It prints each -word that is more than 10 characters long, and also prints the number -of such words. *Note Built-in::, for more information on the -built--in function `length'. - - # Record a 1 for each word that is used at least once. - { - for (i = 0; i < NF; i++) - used[$i] = 1 - } - - # Find number of distinct words more than 10 characters long. - END { - num_long_words = 0 - for (x in used) - if (length(x) > 10) { - ++num_long_words - print x - } - print num_long_words, "words longer than 10 characters" - } - -*Note Sample Program::, for a more detailed example of this type. - -The order in which elements of the array are accessed by this -statement is determined by the internal arrangement of the array -elements within `awk' and cannot be controlled or changed. This can -lead to problems if new elements are added to ARRAY by statements in -BODY; you cannot predict whether or not the `for' loop will reach -them. Similarly, changing VAR inside the loop can produce strange -results. It is best to avoid such things. - - - -File: gawk-info, Node: Delete, Next: Multi-dimensional, Prev: Scanning an Array, Up: Arrays - -The `delete' Statement -====================== - -You can remove an individual element of an array using the `delete' -statement: - - delete ARRAY[INDEX] - -When an array element is deleted, it is as if you had never referred -to it and had never given it any value. Any value the element -formerly had can no longer be obtained. - -Here is an example of deleting elements in an array: - - awk '{ for (i in frequencies) - delete frequencies[i] - }' - -This example removes all the elements from the array `frequencies'. - -If you delete an element, the `for' statement to scan the array will -not report that element, and the `in' operator to check for the -presence of that element will return 0: - - delete foo[4] - if (4 in foo) - print "This will never be printed" - - - -File: gawk-info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Delete, Up: Arrays - -Multi--dimensional arrays -========================= - -A multi--dimensional array is an array in which an element is -identified by a sequence of indices, not a single index. For -example, a two--dimensional array requires two indices. The usual -way (in most languages, including `awk') to refer to an element of a -two--dimensional array named `grid' is with `grid[x,y]'. - -Multi--dimensional arrays are supported in `awk' through -concatenation of indices into one string. What happens is that `awk' -converts the indices into strings (*note Conversion::.) and -concatenates them together, with a separator between them. This -creates a single string that describes the values of the separate -indices. The combined string is used as a single index into an -ordinary, one--dimensional array. The separator used is the value of -the special variable `SUBSEP'. - -For example, suppose the value of `SUBSEP' is `","' and the -expression `foo[5,12]="value"' is executed. The numbers 5 and 12 -will be concatenated with a comma between them, yielding `"5,12"'; -thus, the array element `foo["5,12"]' will be set to `"value"'. - -Once the element's value is stored, `awk' has no record of whether it -was stored with a single index or a sequence of indices. The two -expressions `foo[5,12]' and `foo[5 SUBSEP 12]' always have the same -value. - -The default value of `SUBSEP' is not a comma; it is the string -`"\034"', which contains a nonprinting character that is unlikely to -appear in an `awk' program or in the input data. - -The usefulness of choosing an unlikely character comes from the fact -that index values that contain a string matching `SUBSEP' lead to -combined strings that are ambiguous. Suppose that `SUBSEP' is a -comma; then `foo["a,b", "c"]' and `foo["a", "b,c"]' will be -indistinguishable because both are actually stored as `foo["a,b,c"]'. -Because `SUBSEP' is `"\034"', such confusion can actually happen only -when an index contains the character `"\034"', which is a rare event. - -You can test whether a particular index--sequence exists in a -``multi--dimensional'' array with the same operator `in' used for -single dimensional arrays. Instead of a single index as the -left--hand operand, write the whole sequence of indices, separated by -commas, in parentheses: - - (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY - -The following example treats its input as a two--dimensional array of -fields; it rotates this array 90 degrees clockwise and prints the -result. It assumes that all lines have the same number of elements. - - awk 'BEGIN { - max_nf = max_nr = 0 - } - - { - if (max_nf < NF) - max_nf = NF - max_nr = NR - for (x = 1; x <= NF; x++) - vector[x, NR] = $x - } - - END { - for (x = 1; x <= max_nf; x++) { - for (y = max_nr; y >= 1; --y) - printf("%s ", vector[x, y]) - printf("\n") - } - }' - -When given the input: - - 1 2 3 4 5 6 - 2 3 4 5 6 1 - 3 4 5 6 1 2 - 4 5 6 1 2 3 - -it produces: - - 4 3 2 1 - 5 4 3 2 - 6 5 4 3 - 1 6 5 4 - 2 1 6 5 - 3 2 1 6 - - - -File: gawk-info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays - -Scanning Multi--dimensional Arrays -================================== - -There is no special `for' statement for scanning a -``multi--dimensional'' array; there cannot be one, because in truth -there are no multi--dimensional arrays or elements; there is only a -multi--dimensional *way of accessing* an array. - -However, if your program has an array that is always accessed as -multi--dimensional, you can get the effect of scanning it by -combining the scanning `for' statement (*note Scanning an Array::.) -with the `split' built--in function (*note String Functions::.). It -works like this: - - for (combined in ARRAY) { - split (combined, separate, SUBSEP) - ... - } - -This finds each concatenated, combined index in the array, and splits -it into the individual indices by breaking it apart where the value -of `SUBSEP' appears. The split--out indices become the elements of -the array `separate'. - -Thus, suppose you have previously stored in `ARRAY[1, "foo"]'; then -an element with index `"1\034foo"' exists in ARRAY. (Recall that the -default value of `SUBSEP' contains the character with code 034.) -Sooner or later the `for' statement will find that index and do an -iteration with `combined' set to `"1\034foo"'. Then the `split' -function will be called as follows: - - split ("1\034foo", separate, "\034") - -The result of this is to set `separate[1]' to 1 and `separate[2]' to -`"foo"'. Presto, the original sequence of separate indices has been -recovered. - - - -File: gawk-info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top - -Built--in functions -******************* - -"Built--in" functions are functions always available for your `awk' -program to call. This chapter defines all the built--in functions -that exist; some of them are mentioned in other sections, but they -are summarized here for your convenience. (You can also define new -functions yourself. *Note User-defined::.) - -In most cases, any extra arguments given to built--in functions are -ignored. The defaults for omitted arguments vary from function to -function and are described under the individual functions. - -The name of a built--in function need not be followed immediately by -the opening left parenthesis of the arguments; whitespace is allowed. -However, it is wise to write no space there, since user--defined -functions do not allow space. - -When a function is called, expressions that create the function's -actual parameters are evaluated completely before the function call -is performed. For example, in the code fragment: - - i = 4 - j = myfunc(i++) - -the variable `i' will be set to 5 before `myfunc' is called with a -value of 4 for its actual parameter. - -* Menu: - -* Numeric Functions:: Functions that work with numbers, - including `int', `sin' and `rand'. - -* String Functions:: Functions for string manipulation, - such as `split', `match', and `sprintf'. - -* I/O Functions:: Functions for files and shell commands - - - -File: gawk-info, Node: Numeric Functions, Next: String Functions, Up: Built-in - -Numeric Built--in Functions -=========================== - -The general syntax of the numeric built--in functions is the same for -each. Here is an example of that syntax: - - awk '# Read input records containing a pair of points: x0, y0, x1, y1. - # Print the points and the distance between them. - { printf "%f %f %f %f %f\n", $1, $2, $3, $4, - sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) }' - -This calculates the square root of a calculation that uses the values -of the fields. It then prints the first four fields of the input -record and the result of the square root calculation. - -Here is the full list of numeric built--in functions: - -`int(X)' - This gives you the integer part of X, truncated toward 0. This - produces the nearest integer to X, located between X and 0. - - For example, `int(3)' is 3, `int(3.9)' is 3, `int(-3.9)' is -3, - and `int(-3)' is -3 as well. - -`sqrt(X)' - This gives you the positive square root of X. It reports an - error if X is negative. - -`exp(X)' - This gives you the exponential of X, or reports an error if X is - out of range. The range of values X can have depends on your - machine's floating point representation. - -`log(X)' - This gives you the natural logarithm of X, if X is positive; - otherwise, it reports an error. - -`sin(X)' - This gives you the sine of X, with X in radians. - -`cos(X)' - This gives you the cosine of X, with X in radians. - -`atan2(Y, X)' - This gives you the arctangent of Y/X, with both in radians. - -`rand()' - This gives you a random number. The values of `rand()' are - uniformly--distributed between 0 and 1. The value is never 0 - and never 1. - - Often you want random integers instead. Here is a user--defined - function you can use to obtain a random nonnegative integer less - than N: - - function randint(n) { - return int(n * rand()) - } - - The multiplication produces a random real number at least 0, and - less than N. We then make it an integer (using `int') between 0 - and `N-1'. - - Here is an example where a similar function is used to produce - random integers between 1 and N: - - awk ' - # Function to roll a simulated die. - function roll(n) { return 1 + int(rand() * n) } - - # Roll 3 six--sided dice and print total number of points. - { - printf("%d points\n", roll(6)+roll(6)+roll(6)) - }' - - *Note* that `rand()' starts generating numbers from the same - point, or "seed", each time you run `awk'. This means that the - same program will produce the same results each time you run it. - The numbers are random within one `awk' run, but predictable - from run to run. This is convenient for debugging, but if you - want a program to do different things each time it is used, you - must change the seed to a value that will be different in each - run. To do this, use `srand'. - -`srand(X)' - The function `srand(X)' sets the starting point, or "seed", for - generating random numbers to the value X. - - Each seed value leads to a particular sequence of ``random'' - numbers. Thus, if you set the seed to the same value a second - time, you will get the same sequence of ``random'' numbers again. - - If you omit the argument X, as in `srand()', then the current - date and time of day are used for a seed. This is the way to - get random numbers that are truly unpredictable. - - The return value of `srand()' is the previous seed. This makes - it easy to keep track of the seeds for use in consistently - reproducing sequences of random numbers. - - - -File: gawk-info, Node: String Functions, Next: I/O Functions, Prev: Numeric Functions, Up: Built-in - -Built--in Functions for String Manipulation -=========================================== - -`index(IN, FIND)' - This searches the string IN for the first occurrence of the - string FIND, and returns the position where that occurrence - begins in the string IN. For example: - - awk 'BEGIN { print index("peanut", "an") }' - - prints `3'. If FIND is not found, `index' returns 0. - -`length(STRING)' - This gives you the number of characters in STRING. If STRING is - a number, the length of the digit string representing that - number is returned. For example, `length("abcde")' is 5. - Whereas, `length(15 * 35)' works out to 3. How? Well, 15 * 35 - = 525, and 525 is then converted to the string `"525"', which - has three characters. - -`match(STRING, REGEXP)' - The `match' function searches the string, STRING, for the - longest, leftmost substring matched by the regular expression, - REGEXP. It returns the character position, or "index", of where - that substring begins (1, if it starts at the beginning of - STRING). If no match if found, it returns 0. - - The `match' function sets the special variable `RSTART' to the - index. It also sets the special variable `RLENGTH' to the - length of the matched substring. If no match is found, `RSTART' - is set to 0, and `RLENGTH' to -1. - - For example: - - awk '{ - if ($1 == "FIND") - regex = $2 - else { - where = match($0, regex) - if (where) - print "Match of", regex, "found at", where, "in", $0 - } - }' - - This program looks for lines that match the regular expression - stored in the variable `regex'. This regular expression can be - changed. If the first word on a line is `FIND', `regex' is - changed to be the second word on that line. Therefore, given: - - FIND fo*bar - My program was a foobar - But none of it would doobar - FIND Melvin - JF+KM - This line is property of The Reality Engineering Co. - This file was created by Melvin. - - `awk' prints: - - Match of fo*bar found at 18 in My program was a foobar - Match of Melvin found at 26 in This file was created by Melvin. - -`split(STRING, ARRAY, FIELD_SEPARATOR)' - This divides STRING up into pieces separated by FIELD_SEPARATOR, - and stores the pieces in ARRAY. The first piece is stored in - `ARRAY[1]', the second piece in `ARRAY[2]', and so forth. The - string value of the third argument, FIELD_SEPARATOR, is used as - a regexp to search for to find the places to split STRING. If - the FIELD_SEPARATOR is omitted, the value of `FS' is used. - `split' returns the number of elements created. - - The `split' function, then, splits strings into pieces in a - manner similar to the way input lines are split into fields. - For example: - - split("auto-da-fe", a, "-") - - splits the string `auto-da-fe' into three fields using `-' as - the separator. It sets the contents of the array `a' as follows: - - a[1] = "auto" - a[2] = "da" - a[3] = "fe" - - The value returned by this call to `split' is 3. - -`sprintf(FORMAT, EXPRESSION1,...)' - This returns (without printing) the string that `printf' would - have printed out with the same arguments (*note Printf::.). For - example: - - sprintf("pi = %.2f (approx.)", 22/7) - - returns the string `"pi = 3.14 (approx.)"'. - -`sub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)' - The `sub' function alters the value of TARGET_VARIABLE. It - searches this value, which should be a string, for the leftmost - substring matched by the regular expression, REGEXP, extending - this match as far as possible. Then the entire string is - changed by replacing the matched text with REPLACEMENT_STRING. - The modified string becomes the new value of TARGET_VARIABLE. - - This function is peculiar because TARGET_VARIABLE is not simply - used to compute a value, and not just any expression will do: it - must be a variable, field or array reference, so that `sub' can - store a modified value there. If this argument is omitted, then - the default is to use and alter `$0'. - - For example: - - str = "water, water, everywhere" - sub(/at/, "ith", str) - - sets `str' to `"wither, water, everywhere"', by replacing the - leftmost, longest occurrence of `at' with `ith'. - - The `sub' function returns the number of substitutions made - (either one or zero). - - The special character, `&', in the replacement string, - REPLACEMENT_STRING, stands for the precise substring that was - matched by REGEXP. (If the regexp can match more than one - string, then this precise substring may vary.) For example: - - awk '{ sub(/candidate/, "& and his wife"); print }' - - will change the first occurrence of ``candidate'' to ``candidate - and his wife'' on each input line. - - The effect of this special character can be turned off by - preceding it with a backslash (`\&'). To include a backslash in - the replacement string, it too must be preceded with a (second) - backslash. - - Note: if you use `sub' with a third argument that is not a - variable, field or array element reference, then it will still - search for the pattern and return 0 or 1, but the modified - string is thrown away because there is no place to put it. For - example: - - sub(/USA/, "United States", "the USA and Canada") - - will indeed produce a string `"the United States and Canada"', - but there will be no way to use that string! - -`gsub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)' - This is similar to the `sub' function, except `gsub' replaces - *all* of the longest, leftmost, *non--overlapping* matching - substrings it can find. The ``g'' in `gsub' stands for - "global", which means replace *everywhere*. For example: - - awk '{ gsub(/Britain/, "United Kingdom"); print }' - - replaces all occurrences of the string `Britain' with `United - Kingdom' for all input records. - - The `gsub' function returns the number of substitutions made. - If the variable to be searched and altered, TARGET_VARIABLE, is - omitted, then the entire input record, `$0', is used. - - The characters `&' and `\' are special in `gsub' as they are in - `sub' (see immediately above). - -`substr(STRING, START, LENGTH)' - This returns a LENGTH--character--long substring of STRING, - starting at character number START. The first character of a - string is character number one. For example, - `substr("washington", 5, 3)' returns `"ing"'. - - If LENGTH is not present, this function returns the whole suffix - of STRING that begins at character number START. For example, - `substr("washington", 5)' returns `"ington"'. - - - -File: gawk-info, Node: I/O Functions, Prev: String Functions, Up: Built-in - -Built--in Functions for I/O to Files and Commands -================================================= - -`close(FILENAME)' - Close the file FILENAME. The argument may alternatively be a - shell command that was used for redirecting to or from a pipe; - then the pipe is closed. - - *Note Close Input::, regarding closing input files and pipes. - *Note Close Output::, regarding closing output files and pipes. - -`system(COMMAND)' - The system function allows the user to execute operating system - commands and then return to the `awk' program. The `system' - function executes the command given by the string value of - COMMAND. It returns, as its value, the status returned by the - command that was executed. This is known as returning the "exit - status". - - For example, if the following fragment of code is put in your - `awk' program: - - END { - system("mail -s 'awk run done' operator < /dev/null") - } - - the system operator will be sent mail when the `awk' program - finishes processing input and begins its end--of--input - processing. - - Note that much the same result can be obtained by redirecting - `print' or `printf' into a pipe. However, if your `awk' program - is interactive, this function is useful for cranking up large - self--contained programs, such as a shell or an editor. - - - -File: gawk-info, Node: User-defined, Next: Special, Prev: Built-in, Up: Top - -User--defined Functions -*********************** - -Complicated `awk' programs can often be simplified by defining your -own functions. User--defined functions can be called just like -built--in ones (*note Function Calls::.), but it is up to you to -define them--to tell `awk' what they should do. - -* Menu: - -* Definition Syntax:: How to write definitions and what they mean. -* Function Example:: An example function definition and what it does. -* Function Caveats:: Things to watch out for. -* Return Statement:: Specifying the value a function returns. - - - -File: gawk-info, Node: Definition Syntax, Next: Function Example, Up: User-defined - -Syntax of Function Definitions -============================== - -The definition of a function named NAME looks like this: - - function NAME (PARAMETER-LIST) { - BODY-OF-FUNCTION - } - -A valid function name is like a valid variable name: a sequence of -letters, digits and underscores, not starting with a digit. - -Such function definitions can appear anywhere between the rules of -the `awk' program. The general format of an `awk' program, then, is -now modified to include sequences of rules *and* user--defined -function definitions. - -The function definition need not precede all the uses of the function. -This is because `awk' reads the entire program before starting to -execute any of it. - -The PARAMETER-LIST is a list of the function's "local" variable -names, separated by commas. Within the body of the function, local -variables refer to arguments with which the function is called. If -the function is called with fewer arguments than it has local -variables, this is not an error; the extra local variables are simply -set as the null string. - -The local variable values hide or "shadow" any variables of the same -names used in the rest of the program. The shadowed variables are -not accessible in the function definition, because there is no way to -name them while their names have been taken away for the local -variables. All other variables used in the `awk' program can be -referenced or set normally in the function definition. - -The local variables last only as long as the function is executing. -Once the function finishes, the shadowed variables come back. - -The BODY-OF-FUNCTION part of the definition is the most important -part, because this is what says what the function should actually *do*. -The local variables exist to give the body a way to talk about the -arguments. - -Functions may be "recursive", i.e., they can call themselves, either -directly, or indirectly (via calling a second function that calls the -first again). - -The keyword `function' may also be written `func'. - - - -File: gawk-info, Node: Function Example, Next: Function Caveats, Prev: Definition Syntax, Up: User-defined - -Function Definition Example -=========================== - -Here is an example of a user--defined function, called `myprint', -that takes a number and prints it in a specific format. - - function myprint(num) - { - printf "%6.3g\n", num - } - -To illustrate, let's use the following `awk' rule to use, or "call", -our `myprint' function: - - $3 > 0 { myprint($3) }' - -This program prints, in our special format, all the third fields that -contain a positive number in our input. Therefore, when given: - - 1.2 3.4 5.6 7.8 - 9.10 11.12 13.14 15.16 - 17.18 19.20 21.22 23.24 - -this program, using our function to format the results, will print: - - 5.6 - 13.1 - 21.2 - -Here is a rather contrived example of a recursive function. It -prints a string backwards: - - function rev (str, len) { - if (len == 0) { - printf "\n" - return - } - printf "%c", substr(str, len, 1) - rev(str, len - 1) - } - - - -File: gawk-info, Node: Function Caveats, Next: Return Statement, Prev: Function Example, Up: User-defined - -Caveats of Function Calling -=========================== - -*Note* that there cannot be any blanks between the function name and -the left parenthesis of the argument list, when calling a function. -This is so `awk' can tell you are not trying to concatenate the value -of a variable with the value of an expression inside the parentheses. - -When a function is called, it is given a *copy* of the values of its -arguments. This is called "passing by value". The caller may use a -variable as the expression for the argument, but the called function -does not know this: all it knows is what value the argument had. For -example, if you write this code: - - foo = "bar" - z = myfunc(foo) - -then you should not think of the argument to `myfunc' as being ``the -variable `foo'''. Instead, think of the argument as the string -value, `"bar"'. - -If the function `myfunc' alters the values of its local variables, -this has no effect on any other variables. In particular, if -`myfunc' does this: - - function myfunc (win) { - print win - win = "zzz" - print win - } - -to change its first argument variable `win', this *does not* change -the value of `foo' in the caller. The role of `foo' in calling -`myfunc' ended when its value, `"bar"', was computed. If `win' also -exists outside of `myfunc', this definition will not change it--that -value is shadowed during the execution of `myfunc' and cannot be seen -or changed from there. - -However, when arrays are the parameters to functions, they are *not* -copied. Instead, the array itself is made available for direct -manipulation by the function. This is usually called "passing by -reference". Changes made to an array parameter inside the body of a -function *are* visible outside that function. *This can be very -dangerous if you don't watch what you are doing.* For example: - - function changeit (array, ind, nvalue) { - array[ind] = nvalue - } - - BEGIN { - a[1] = 1 ; a[2] = 2 ; a[3] = 3 - changeit(a, 2, "two") - printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3] - } - -will print `a[1] = 1, a[2] = two, a[3] = 3', because the call to -`changeit' stores `"two"' in the second element of `a'. - - - -File: gawk-info, Node: Return Statement, Prev: Function Caveats, Up: User-defined - -The `return' statement -====================== - -The body of a user--defined function can contain a `return' statement. -This statement returns control to the rest of the `awk' program. It -can also be used to return a value for use in the rest of the `awk' -program. It looks like: - - `return EXPRESSION' - -The EXPRESSION part is optional. If it is omitted, then the returned -value is undefined and, therefore, unpredictable. - -A `return' statement with no value expression is assumed at the end -of every function definition. So if control reaches the end of the -function definition, then the function returns an unpredictable value. - -Here is an example of a user--defined function that returns a value -for the largest number among the elements of an array: - - function maxelt (vec, i, ret) { - for (i in vec) { - if (ret == "" || vec[i] > ret) - ret = vec[i] - } - return ret - } - -You call `maxelt' with one argument, an array name. The local -variables `i' and `ret' are not intended to be arguments; while there -is nothing to stop you from passing two or three arguments to -`maxelt', the results would be strange. - -When writing a function definition, it is conventional to separate -the parameters from the local variables with extra spaces, as shown -above in the definition of `maxelt'. - -Here is a program that uses, or calls, our `maxelt' function. This -program loads an array, calls `maxelt', and then reports the maximum -number in that array: - - awk ' - function maxelt (vec, i, ret) { - for (i in vec) { - if (ret == "" || vec[i] > ret) - ret = vec[i] - } - return ret - } - - # Load all fields of each record into nums. - { - for(i = 1; i <= NF; i++) - nums[NR, i] = $i - } - - END { - print maxelt(nums) - }' - -Given the following input: - - 1 5 23 8 16 - 44 3 5 2 8 26 - 256 291 1396 2962 100 - -6 467 998 1101 - 99385 11 0 225 - -our program tells us (predictably) that: - - 99385 - -is the largest number in our array. - - - -File: gawk-info, Node: Special, Next: Sample Program, Prev: User-defined, Up: Top - -Special Variables -***************** - -Most `awk' variables are available for you to use for your own -purposes; they will never change except when your program assigns -them, and will never affect anything except when your program -examines them. - -A few variables have special meanings. Some of them `awk' examines -automatically, so that they enable you to tell `awk' how to do -certain things. Others are set automatically by `awk', so that they -carry information from the internal workings of `awk' to your program. - -Most of these variables are also documented in the chapters where -their areas of activity are described. - -* Menu: - -* User-modified:: Special variables that you change to control `awk'. - -* Auto-set:: Special variables where `awk' gives you information. - - - -File: gawk-info, Node: User-modified, Next: Auto-set, Up: Special - -Special Variables That Control `awk' -==================================== - -This is a list of the variables which you can change to control how -`awk' does certain things. - -`FS' - `FS' is the input field separator (*note Field Separators::.). - The value is a regular expression that matches the separations - between fields in an input record. - - The default value is `" "', a string consisting of a single - space. As a special exception, this value actually means that - any sequence of spaces and tabs is a single separator. It also - causes spaces and tabs at the beginning or end of a line to be - ignored. - - You can set the value of `FS' on the command line using the `-F' - option: - - awk -F, 'PROGRAM' INPUT-FILES - -`OFMT' - This string is used by `awk' to control conversion of numbers to - strings (*note Conversion::.). It works by being passed, in - effect, as the first argument to the `sprintf' function. Its - default value is `"%.6g"'. - -`OFS' - This is the output field separator (*note Output Separators::.). - It is output between the fields output by a `print' statement. - Its default value is `" "', a string consisting of a single space. - -`ORS' - This is the output record separator (*note Output - Separators::.). It is output at the end of every `print' - statement. Its default value is the newline character, often - represented in `awk' programs as `\n'. - -`RS' - This is `awk''s record separator (*note Records::.). Its - default value is a string containing a single newline character, - which means that an input record consists of a single line of - text. - -`SUBSEP' - `SUBSEP' is a subscript separator (*note Multi-dimensional::.). - It has the default value of `"\034"', and is used to separate - the parts of the name of a multi--dimensional array. Thus, if - you access `foo[12,3]', it really accesses `foo["12\0343"]'. - - - -File: gawk-info, Node: Auto-set, Prev: User-modified, Up: Special - -Special Variables That Convey Information to You -================================================ - -This is a list of the variables that are set automatically by `awk' -on certain occasions so as to provide information for your program. - -`ARGC' -`ARGV' - The command--line arguments available to `awk' are stored in an - array called `ARGV'. `ARGC' is the number of command--line - arguments present. `ARGV' is indexed from zero to `ARGC' - 1. - For example: - - awk '{ print ARGV[$1] }' inventory-shipped BBS-list - - In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains - `"inventory-shipped"', and `ARGV[2]' contains `"BBS-list"'. - `ARGC' is 3, one more than the index of the last element in - `ARGV' since the elements are numbered from zero. - - Notice that the `awk' program is not treated as an argument. - The `-f' `FILENAME' option, and the `-F' option, are also not - treated as arguments for this purpose. - - Variable assignments on the command line *are* treated as - arguments, and do show up in the `ARGV' array. - - Your program can alter `ARGC' the elements of `ARGV'. Each time - `awk' reaches the end of an input file, it uses the next element - of `ARGV' as the name of the next input file. By storing a - different string there, your program can change which files are - read. You can use `-' to represent the standard input. By - storing additional elements and incrementing `ARGC' you can - cause additional files to be read. - - If you decrease the value of `ARGC', that eliminates input files - from the end of the list. By recording the old value of `ARGC' - elsewhere, your program can treat the eliminated arguments as - something other than file names. - - To eliminate a file from the middle of the list, store the null - string (`""') into `ARGV' in place of the file's name. As a - special feature, `awk' ignores file names that have been - replaced with the null string. - -`ENVIRON' - This is an array that contains the values of the environment. - The array indices are the environment variable names; the values - are the values of the particular environment variables. For - example, `ENVIRON["HOME"]' might be `/u/close'. Changing this - array does not affect the environment passed on to any programs - that `awk' may spawn via redirection or the `system' function. - (This may not work under operating systems other than MS-DOS, - Unix, or GNU.) - -`FILENAME' - This is the name of the file that `awk' is currently reading. - If `awk' is reading from the standard input (in other words, - there are no files listed on the command line), `FILENAME' is - set to `"-"'. `FILENAME' is changed each time a new file is - read (*note Reading Files::.). - -`FNR' - `FNR' is the current record number in the current file. `FNR' - is incremented each time a new record is read (*note Getline::.). - It is reinitialized to 0 each time a new input file is started. - -`NF' - `NF' is the number of fields in the current input record. `NF' - is set each time a new record is read, when a new field is - created, or when $0 changes (*note Fields::.). - -`NR' - This is the number of input records `awk' has processed since - the beginning of the program's execution. (*note Records::.). - `NR' is set each time a new record is read. - -`RLENGTH' - `RLENGTH' is the length of the string matched by the `match' - function (*note String Functions::.). `RLENGTH' is set by - invoking the `match' function. Its value is the length of the - matched string, or -1 if no match was found. - -`RSTART' - `RSTART' is the start of the string matched by the `match' - function (*note String Functions::.). `RSTART' is set by - invoking the `match' function. Its value is the position of the - string where the matched string starts, or 0 if no match was - found. - - - -File: gawk-info, Node: Sample Program, Next: Notes, Prev: Special, Up: Top - -Sample Program -************** - -The following example is a complete `awk' program, which prints the -number of occurrences of each word in its input. It illustrates the -associative nature of `awk' arrays by using strings as subscripts. -It also demonstrates the `for X in ARRAY' construction. Finally, it -shows how `awk' can be used in conjunction with other utility -programs to do a useful task of some complexity with a minimum of -effort. Some explanations follow the program listing. - - awk ' - # Print list of word frequencies - { - for (i = 1; i <= NF; i++) - freq[$i]++ - } - - END { - for (word in freq) - printf "%s\t%d\n", word, freq[word] - }' - -The first thing to notice about this program is that it has two -rules. The first rule, because it has an empty pattern, is executed -on every line of the input. It uses `awk''s field--accessing -mechanism (*note Fields::.) to pick out the individual words from the -line, and the special variable `NF' (*note Special::.) to know how -many fields are available. - -For each input word, an element of the array `freq' is incremented to -reflect that the word has been seen an additional time. - -The second rule, because it has the pattern `END', is not executed -until the input has been exhausted. It prints out the contents of -the `freq' table that has been built up inside the first action. - -Note that this program has several problems that would prevent it -from being useful by itself on real text files: - - * Words are detected using the `awk' convention that fields are - separated by whitespace and that other characters in the input - (except newlines) don't have any special meaning to `awk'. This - means that punctuation characters count as part of words. - - * The `awk' language considers upper and lower case characters to - be distinct. Therefore, `foo' and `Foo' will not be treated by - this program as the same word. This is undesirable since in - normal text, words are capitalized if they begin sentences, and - a frequency analyzer should not be sensitive to that. - - * The output does not come out in any useful order. You're more - likely to be interested in which words occur most frequently, or - having an alphabetized table of how frequently each word occurs. - -The way to solve these problems is to use other operating system -utilities to process the input and output of the `awk' script. -Suppose the script shown above is saved in the file `frequency.awk'. -Then the shell command: - - tr A-Z a-z < file1 | tr -cd 'a-z\012' \ - | awk -f frequency.awk \ - | sort +1 -nr - -produces a table of the words appearing in `file1' in order of -decreasing frequency. - -The first `tr' command in this pipeline translates all the upper case -characters in `file1' to lower case. The second `tr' command deletes -all the characters in the input except lower case characters and -newlines. The second argument to the second `tr' is quoted to -protect the backslash in it from being interpreted by the shell. The -`awk' program reads this suitably massaged data and produces a word -frequency table, which is not ordered. - -The `awk' script's output is now sorted by the `sort' command and -printed on the terminal. The options given to `sort' in this example -specify to sort by the second field of each input line (skipping one -field), that the sort keys should be treated as numeric quantities -(otherwise `15' would come before `5'), and that the sorting should -be done in descending (reverse) order. - -See the general operating system documentation for more information -on how to use the `tr' and `sort' commands. - - - -File: gawk-info, Node: Notes, Next: Glossary, Prev: Sample Program, Up: Top - -Implementation Notes -******************** - -This appendix contains information mainly of interest to implementors -and maintainers of `gawk'. Everything in it applies specifically to -`gawk', and not to other implementations. - -* Menu: - -* Extensions:: Things`gawk' does that Unix `awk' does not. - -* Future Extensions:: Things likely to appear in a future release. - -* Improvements:: Suggestions for future improvements. - -* Manual Improvements:: Suggestions for improvements to this manual. - - - -File: gawk-info, Node: Extensions, Next: Future Extensions, Up: Notes - -GNU Extensions to the AWK Language -================================== - -Several new features are in a state of flux. They are described here -merely to document them somewhat, but they will probably change. We -hope they will be incorporated into other versions of `awk', too. - -All of these features can be turned off either by compiling `gawk' -with `-DSTRICT', or by invoking `gawk' as `awk'. - -The `AWKPATH' environment variable - When opening a file supplied via the `-f' option, if the - filename does not contain a `/', `gawk' will perform a "path - search" for the file, similar to that performed by the shell. - `gawk' gets its search path from the `AWKPATH' environment - variable. If that variable does not exist, it uses the default - path `".:/usr/lib/awk:/usr/local/lib/awk"'. - -Case Independent Matching - Two new operators have been introduced, `~~', and `!~~'. These - perform regular expression match and no-match operations that - are case independent. In other words, `A' and `a' would both - match `/a/'. - -The `-i' option - This option causes the `~' and `!~' operators to behave like the - `~~' and `!~~' operators described above. - -The `-v' option - This option prints version information for this particular copy - of `gawk'. This is so you can determine if your copy of `gawk' - is up to date with respect to whatever the Free Software - Foundation is currently distributing. It may disappear in a - future version of `gawk'. - - - -File: gawk-info, Node: Future Extensions, Next: Improvements, Prev: Extensions, Up: Notes - -Extensions Likely To Appear In A Future Release -=============================================== - -Here are some more extensions that indicate the directions we are -currently considering for `gawk'. Like the previous section, this -section is also subject to change. None of these are implemented yet. - -The `IGNORECASE' special variable - If `IGNORECASE' is non--zero, then *all* regular expression - matching will be done in a case--independent fashion. The `-i' - option and the `~~' and `!~~' operators will go away, as this - mechanism generalizes those facilities. - -More Escape Sequences - The ANSI C `\a', and `\x' escape sequences will be recognized. - Unix `awk' does not recognize `\v', although `gawk' does. - -`RS' as a regexp - The meaning of `RS' will be generalized along the lines of `FS'. - -Transliteration Functions - We are planning on adding `toupper' and `tolower' functions - which will take string arguments, and return strings where the - case of each letter has been transformed to upper-- or - lower--case respectively. - -Access To System File Descriptors - `gawk' will recognize the special file names `/dev/stdin', - `/dev/stdout', `/dev/stderr', and `/dev/fd/N' internally. These - will allow access to inherited file descriptors from within an - `awk' program. - - - -File: gawk-info, Node: Improvements, Next: Manual Improvements, Prev: Future Extensions, Up: Notes - -Suggestions for Future Improvements -=================================== - -Here are some projects that would--be `gawk' hackers might like to -take on. They vary in size from a few days to a few weeks of -programming, depending on which one you choose and how fast a -programmer you are. Please send any improvements you write to the -maintainers at the GNU project. - - 1. State machine regexp matcher: At present, `gawk' uses the - backtracking regular expression matcher from the GNU subroutine - library. If a regexp is really going to be used a lot of times, - it is faster to convert it once to a description of a finite - state machine, then run a routine simulating that machine every - time you want to match the regexp. You could use the matching - routines used by GNU `egrep'. - - 2. Compilation of `awk' programs: `gawk' uses a `Bison' - (YACC--like) parser to convert the script given it into a syntax - tree; the syntax tree is then executed by a simple recursive - evaluator. Both of these steps incur a lot of overhead, since - parsing can be slow (especially if you also do the previous - project and convert regular expressions to finite state machines - at compile time) and the recursive evaluator performs many - procedure calls to do even the simplest things. - - It should be possible for `gawk' to convert the script's parse - tree into a C program which the user would then compile, using - the normal C compiler and a special `gawk' library to provide - all the needed functions (regexps, fields, associative arrays, - type coercion, and so on). - - An easier possibility might be for an intermediate phase of - `awk' to convert the parse tree into a linear byte code form - like the one used in GNU Emacs Lisp. The recursive evaluator - would then be replaced by a straight line byte code interpreter - that would be intermediate in speed between running a compiled - program and doing what `gawk' does now. - - - -File: gawk-info, Node: Manual Improvements, Prev: Improvements, Up: Notes - -Suggestions For Future Improvements of This Manual -================================================== - - 1. An error message section has not been included in this version - of the manual. Perhaps some nice beta testers will document - some of the messages for the future. - - 2. A summary page has not been included, as the ``man'', or help, - page that comes with the `gawk' code should suffice. - - GNU only supports Info, so this manual itself should contain - whatever forms of information it would be useful to have on an - Info summary page. - - 3. A function and variable index has not been included as we are - not sure what to put in it. - - 4. A section summarizing the differences between V7 `awk' and - System V Release 4 `awk' would be useful for long--time `awk' - hackers. - - - -File: gawk-info, Node: Glossary, Next: Index, Prev: Notes, Up: Top - -Glossary -******** - -Action - A series of `awk' statements attached to a rule. If the rule's - pattern matches an input record, the `awk' language executes the - rule's action. Actions are always enclosed in curly braces. - -Amazing `awk' assembler - Henry Spencer at the University of Toronto wrote a retargetable - assembler completely as `awk' scripts. It is thousands of lines - long, including machine descriptions for several 8--bit - microcomputers. It is distributed with `gawk' and is a good - example of a program that would have been better written in - another language. - -Assignment - An `awk' expression that changes the value of some `awk' - variable or data object. An object that you can assign to is - called an "lvalue". - -Built-in function - The `awk' language provides built--in functions that perform - various numerical and string computations. Examples are `sqrt' - (for the square root of a number) and `substr' (for a substring - of a string). - -C - The system programming language that most of GNU is written in. - The `awk' programming language has C--like syntax, and this - manual points out similarities between `awk' and C when - appropriate. - -Compound statement - A series of `awk' statements, enclosed in curly braces. - Compound statements may be nested. - -Concatenation - Concatenating two strings means sticking them together, one - after another, giving a new string. For example, the string - `foo' concatenated with the string `bar' gives the string - `foobar'. - -Conditional expression - A relation that is either true or false, such as `(a < b)'. - Conditional expressions are used in `if' and `while' statements, - and in patterns to select which input records to process. - -Curly braces - The characters `{' and `}'. Curly braces are used in `awk' for - delimiting actions, compound statements, and function bodies. - -Data objects - These are numbers and strings of characters. Numbers are - converted into strings and vice versa, as needed. - -Escape Sequences - A special sequence of characters used for describing - non--printable characters, such as `\n' for newline, or `\033' - for the ASCII ESC (escape) character. - -Field - When `awk' reads an input record, it splits the record into - pieces separated by whitespace (or by a separator regexp which - you can change by setting the special variable `FS'). Such - pieces are called fields. - -Format - Format strings are used to control the appearance of output in - the `printf' statement. Also, data conversions from numbers to - strings are controlled by the format string contained in the - special variable `OFMT'. - -Function - A specialized group of statements often used to encapsulate - general or program--specific tasks. `awk' has a number of - built--in functions, and also allows you to define your own. - -`gawk' - The GNU implementation of `awk'. - -`awk' language - The language in which `awk' programs are written. - -`awk' program - An `awk' program consists of a series of "patterns" and - "actions", collectively known as "rules". For each input record - given to the program, the program's rules are all processed in - turn. `awk' programs may also contain function definitions. - -`awk' script - Another name for an `awk' program. - -Input record - A single chunk of data read in by `awk'. Usually, an `awk' - input record consists of one line of text. - -Keyword - In the `awk' language, a keyword is a word that has special - meaning. Keywords are reserved and may not be used as variable - names. - - The keywords are: `if', `else', `while', `do...while', `for', - `for...in', `break', `continue', `delete', `next', `function', - `func', and `exit'. - -Lvalue - An expression that can appear on the left side of an assignment - operator. In most languages, lvalues can be variables or array - elements. In `awk', a field designator can also be used as an - lvalue. - -Number - A numeric valued data object. The `gawk' implementation uses - double precision floating point to represent numbers. - -Pattern - Patterns tell `awk' which input records are interesting to which - rules. - - A pattern is an arbitrary conditional expression against which - input is tested. If the condition is satisfied, the pattern is - said to "match" the input record. A typical pattern might - compare the input record against a regular expression. - -Range (of input lines) - A sequence of consecutive lines from the input file. A pattern - can specify ranges of input lines for `awk' to process, or it - can specify single lines. - -Recursion - When a function calls itself, either directly or indirectly. If - this isn't clear, refer to the entry for ``recursion''. - -Redirection - Redirection means performing input from other than the standard - input stream, or output to other than the standard output stream. - - You can redirect the output of the `print' and `printf' - statements to a file or a system command, using the `>', `>>', - and `|' operators. You can redirect input to the `getline' - statement using the `<' and `|' operators. - -Regular Expression - See ``regexp''. - -Regexp - Short for "regular expression". A regexp is a pattern that - denotes a set of strings, possibly an infinite set. For - example, the regexp `R.*xp' matches any string starting with the - letter `R' and ending with the letters `xp'. In `awk', regexps - are used in patterns and in conditional expressions. - -Rule - A segment of an `awk' program, that specifies how to process - single input records. A rule consists of a "pattern" and an - "action". `awk' reads an input record; then, for each rule, if - the input record satisfies the rule's pattern, `awk' executes - the rule's action. Otherwise, the rule does nothing for that - input record. - -Special Variable - The variables `ARGC', `ARGV', `ENVIRON', `FILENAME', `FNR', - `FS', `NF', `NR', `OFMT', `OFS', `ORS', `RLENGTH', `RSTART', - `RS', `SUBSEP', have special meaning to `awk'. Changing some of - them affects `awk''s running environment. - -Stream Editor - A program that reads records from an input stream and processes - them one or more at a time. This is in contrast with batch - programs, which may expect to read their input files in entirety - before starting to do anything, and with interactive programs, - which require input from the user. - -String - A datum consisting of a sequence of characters, such as `I am a - string'. Constant strings are written with double--quotes in - the `awk' language, and may contain "escape sequences". - -Whitespace - A sequence of blank or tab characters occurring inside an input - record or a string. - - - -File: gawk-info, Node: Index, Prev: Glossary, Up: Top - -Index -***** - -* Menu: - -* #!: Executable Scripts. -* -f option: Long. -* `$NF', last field in record: Fields. -* `$' (field operator): Fields. -* `>>': Redirection. -* `>': Redirection. -* `BEGIN', special pattern: BEGIN/END. -* `END', special pattern: BEGIN/END. -* `awk' language: This Manual. -* `awk' program: This Manual. -* `break' statement: Break. -* `close' statement for input: Close Input. -* `close' statement for output: Close Output. -* `continue' statement: Continue. -* `delete' statement: Delete. -* `exit' statement: Exit. -* `for (x in ...)': Scanning an Array. -* `for' statement: For. -* `if' statement: If. -* `next' statement: Next. -* `print $0': Very Simple. -* `printf' statement, format of: Basic Printf. -* `printf', format-control characters: Format-Control. -* `printf', modifiers: Modifiers. -* `print' statement: Print. -* `return' statement: Return Statement. -* `while' statement: While. -* `|': Redirection. -* `BBS-list' file: The Files. -* `inventory-shipped' file: The Files. -* Accessing fields: Fields. -* Acronym: History. -* Action, curly braces: Actions. -* Action, curly braces: Getting Started. -* Action, default: Very Simple. -* Action, definition of: Getting Started. -* Action, general: Actions. -* Action, separating statements: Actions. -* Applications of `awk': When. -* Arguments in function call: Function Calls. -* Arguments, Command Line: Command Line. -* Arithmetic operators: Arithmetic Ops. -* Array assignment: Assigning Elements. -* Array reference: Reference to Elements. -* Arrays: Array Intro. -* Arrays, definition of: Array Intro. -* Arrays, deleting an element: Delete. -* Arrays, determining presence of elements: Reference to Elements. -* Arrays, multi-dimensional subscripts: Multi-dimensional. -* Arrays, special `for' statement: Scanning an Array. -* Assignment operators: Assignment Ops. -* Associative arrays: Array Intro. -* Backslash Continuation: Statements/Lines. -* Basic function of `gawk': Getting Started. -* Body of a loop: While. -* Boolean expressions: Boolean Ops. -* Boolean operators: Boolean Ops. -* Boolean patterns: Boolean. -* Built-in functions, list of: Built-in. -* Built-in variables: Variables. -* Calling a function: Function Calls. -* Case sensitivity and gawk: Read Terminal. -* Changing contents of a field: Changing Fields. -* Changing the record separator: Records. -* Closing files and pipes: Close Output. -* Command Line: Command Line. -* Command line formats: Running gawk. -* Command line, setting `FS' on: Field Separators. -* Comments: Comments. -* Comparison expressions: Comparison Ops. -* Comparison expressions as patterns: Comparison Patterns. -* Compound statements: Actions. -* Computed Regular Expressions: Regexp Usage. -* Concatenation: Concatenation. -* Conditional Patterns: Conditional Patterns. -* Conditional expression: Conditional Exp. -* Constants, types of: Constants. -* Continuing statements on the next line: Statements/Lines. -* Conversion of strings and numbers: Conversion. -* Curly braces: Actions. -* Curly braces: Getting Started. -* Default action: Very Simple. -* Default pattern: Very Simple. -* Deleting elements of arrays: Delete. -* Differences between `gawk' and `awk': Arithmetic Ops. -* Differences between `gawk' and `awk': Constants. -* Documenting `awk' programs: Comments. -* Dynamic Regular Expressions: Regexp Usage. -* Element assignment: Assigning Elements. -* Element of array: Reference to Elements. -* Emacs Lisp: When. -* Empty pattern: Empty. -* Escape sequence notation: Constants. -* Examining fields: Fields. -* Executable Scripts: Executable Scripts. -* Expression, conditional: Conditional Exp. -* Expressions: Actions. -* Expressions, boolean: Boolean Ops. -* Expressions, comparison: Comparison Ops. -* Field separator, `FS': Field Separators. -* Field separator, choice of: Field Separators. -* Field separator, setting on command line: Field Separators. -* Field, changing contents of: Changing Fields. -* Fields: Fields. -* Fields, negative-numbered: Non-Constant Fields. -* Fields, semantics of: Field Separators. -* Fields, separating: Field Separators. -* Format specifier: Format-Control. -* Format string: Basic Printf. -* Formatted output: Printf. -* Function call: Function Calls. -* Function definitions: Actions. -* Functions, user-defined: User-defined. -* General input: Reading Files. -* History of `awk': History. -* How gawk works: Two Rules. -* Increment operators: Increment Ops. -* Input file, sample: The Files. -* Input, `getline' function: Getline. -* Input, general: Reading Files. -* Input, multiple line records: Multiple. -* Input, standard: Read Terminal. -* Input, standard: Reading Files. -* Interaction of `awk' with other programs: I/O Functions. -* Invocation of `gawk': Command Line. -* Language, `awk': This Manual. -* Loop: While. -* Loops, breaking out of: Break. -* Lvalue: Assignment Ops. -* Manual, using this: This Manual. -* Metacharacters: Regexp Operators. -* Mod function, semantics of: Arithmetic Ops. -* Modifiers (in format specifiers): Modifiers. -* Multiple line records: Multiple. -* Multiple passes over data: Command Line. -* Multiple statements on one line: Statements/Lines. -* Negative-numbered fields: Non-Constant Fields. -* Number of fields, `NF': Fields. -* Number of records, `FNR': Records. -* Number of records, `NR': Records. -* Numerical constant: Constants. -* Numerical value: Constants. -* One-liners: One-liners. -* Operator, Ternary: Conditional Patterns. -* Operators, `$': Fields. -* Operators, arithmetic: Arithmetic Ops. -* Operators, assignment: Assignment Ops. -* Operators, boolean: Boolean Ops. -* Operators, increment: Increment Ops. -* Operators, regular expression matching: Regexp Usage. -* Operators, relational: Comparison Ops. -* Operators, relational: Comparison Patterns. -* Operators, string: Concatenation. -* Operators, string-matching: Regexp Usage. -* Options, Command Line: Command Line. -* Output: Printing. -* Output field separator, `OFS': Output Separators. -* Output record separator, `ORS': Output Separators. -* Output redirection: Redirection. -* Output, formatted: Printf. -* Output, piping: Redirection. -* Passes, Multiple: Command Line. -* Pattern, case sensitive: Read Terminal. -* Pattern, comparison expressions: Comparison Patterns. -* Pattern, default: Very Simple. -* Pattern, definition of: Getting Started. -* Pattern, empty: Empty. -* Pattern, regular expressions: Regexp. -* Patterns, `BEGIN': BEGIN/END. -* Patterns, `END': BEGIN/END. -* Patterns, Conditional: Conditional Patterns. -* Patterns, boolean: Boolean. -* Patterns, definition of: Patterns. -* Patterns, types of: Patterns. -* Pipes for output: Redirection. -* Printing, general: Printing. -* Program, `awk': This Manual. -* Program, Self contained: Executable Scripts. -* Program, definition of: Getting Started. -* Programs, documenting: Comments. -* Range pattern: Ranges. -* Reading files, `getline' function: Getline. -* Reading files, general: Reading Files. -* Reading files, multiple line records: Multiple. -* Record separator, `RS': Records. -* Records, multiple line: Multiple. -* Redirection of output: Redirection. -* Reference to array: Reference to Elements. -* Regexp: Regexp. -* Regular Expressions, Computed: Regexp Usage. -* Regular Expressions, Dynamic: Regexp Usage. -* Regular expression matching operators: Regexp Usage. -* Regular expression, metacharacters: Regexp Operators. -* Regular expressions as patterns: Regexp. -* Regular expressions, field separators and: Field Separators. -* Relational operators: Comparison Patterns. -* Relational operators: Comparison Ops. -* Removing elements of arrays: Delete. -* Rule, definition of: Getting Started. -* Running gawk programs: Running gawk. -* Sample input file: The Files. -* Scanning an array: Scanning an Array. -* Script, definition of: Getting Started. -* Scripts, Executable: Executable Scripts. -* Scripts, Shell: Executable Scripts. -* Self contained Programs: Executable Scripts. -* Separator character, choice of: Field Separators. -* Shell Scripts: Executable Scripts. -* Single quotes, why they are needed: One-shot. -* Special variables, user modifiable: User-modified. -* Standard input: Read Terminal. -* Standard input: Reading Files. -* Statements: Statements. -* Statements: Actions. -* String constants: Constants. -* String operators: Concatenation. -* String value: Constants. -* String-matching operators: Regexp Usage. -* Subscripts, multi-dimensional in arrays: Multi-dimensional. -* Ternary Operator: Conditional Patterns. -* Use of comments: Comments. -* User-defined functions: User-defined. -* User-defined variables: Variables. -* Uses of `awk': Preface. -* Using this manual: This Manual. -* Variables, built-in: Variables. -* Variables, user-defined: Variables. -* What is `awk': Preface. -* When to use `awk': When. -* file, `awk' program: Long. -* patterns, range: Ranges. -* program file: Long. -* regexp search operators: Regexp Usage. -* running long programs: Long. - - - -Tag Table: -Node: Top918 -Node: Preface2804 -Node: History4267 -Node: License5644 -Node: This Manual18989 -Node: The Files20330 -Node: Getting Started22914 -Node: Very Simple24249 -Node: Two Rules26030 -Node: More Complex28066 -Node: Running gawk30908 -Node: One-shot31827 -Node: Read Terminal32945 -Node: Long33862 -Node: Executable Scripts34991 -Node: Command Line36534 -Node: Comments40168 -Node: Statements/Lines41067 -Node: When43498 -Node: Reading Files45420 -Node: Records47119 -Node: Fields49902 -Node: Non-Constant Fields52789 -Node: Changing Fields54591 -Node: Field Separators57302 -Node: Multiple62004 -Node: Assignment Options64393 -Node: Getline65608 -Node: Close Input74958 -Node: Printing76023 -Node: Print76748 -Node: Print Examples78712 -Node: Output Separators80751 -Node: Redirection82417 -Node: Close Output85886 -Node: Printf88132 -Node: Basic Printf88908 -Node: Format-Control90261 -Node: Modifiers91806 -Node: Printf Examples93108 -Node: One-liners95707 -Node: Patterns97642 -Node: Empty100130 -Node: Regexp100402 -Node: Regexp Usage101173 -Node: Regexp Operators102947 -Node: Comparison Patterns107890 -Node: Ranges109336 -Node: BEGIN/END110722 -Node: Boolean113151 -Node: Conditional Patterns115605 -Node: Actions116105 -Node: Expressions117435 -Node: Constants119124 -Node: Variables121097 -Node: Arithmetic Ops122454 -Node: Concatenation123840 -Node: Comparison Ops124569 -Node: Boolean Ops125973 -Node: Assignment Ops128266 -Node: Increment Ops131817 -Node: Conversion134112 -Node: Conditional Exp136066 -Node: Function Calls137384 -Node: Statements139939 -Node: If141253 -Node: While142627 -Node: Do144232 -Node: For145265 -Node: Break148306 -Node: Continue149848 -Node: Next151476 -Node: Exit152985 -Node: Arrays154514 -Node: Array Intro155624 -Node: Reference to Elements159227 -Node: Assigning Elements161115 -Node: Array Example161615 -Node: Scanning an Array163336 -Node: Delete165642 -Node: Multi-dimensional166529 -Node: Multi-scanning169746 -Node: Built-in171303 -Node: Numeric Functions172806 -Node: String Functions176601 -Node: I/O Functions183717 -Node: User-defined185189 -Node: Definition Syntax185834 -Node: Function Example187928 -Node: Function Caveats189034 -Node: Return Statement191386 -Node: Special193612 -Node: User-modified194478 -Node: Auto-set196511 -Node: Sample Program200558 -Node: Notes204316 -Node: Extensions204909 -Node: Future Extensions206490 -Node: Improvements207922 -Node: Manual Improvements210034 -Node: Glossary210928 -Node: Index217934 - -End Tag Table diff --git a/gawk-info-1 b/gawk-info-1 deleted file mode 100644 index b40278a4..00000000 --- a/gawk-info-1 +++ /dev/null @@ -1,1231 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: Top, Next: Preface, Prev: (dir), Up: (dir) - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them; it -contains the following chapters: - -* Menu: - -* Preface:: What you can do with `awk'; brief history - and acknowledgements. - -* License:: Your right to copy and distribute `gawk'. - -* This Manual:: Using this manual. - - Includes sample input files that you can use. - -* Getting Started:: A basic introduction to using `awk'. - How to run an `awk' program. Command line syntax. - -* Reading Files:: How to read files and manipulate fields. - -* Printing:: How to print using `awk'. Describes the - `print' and `printf' statements. - Also describes redirection of output. - -* One-liners:: Short, sample `awk' programs. - -* Patterns:: The various types of patterns explained in detail. - -* Actions:: The various types of actions are introduced here. - Describes expressions and the various operators in - detail. Also describes comparison expressions. - -* Statements:: The various control statements are described in - detail. - -* Arrays:: The description and use of arrays. Also includes - array--oriented control statements. - -* User-defined:: User--defined functions are described in detail. - -* Built-in:: The built--in functions are summarized here. - -* Special:: The special variables are summarized here. - -* Sample Program:: A sample `awk' program with a complete explanation. - -* Notes:: Something about the implementation of `gawk'. - -* Glossary:: An explanation of some unfamiliar terms. - -* Index:: - - - -File: gawk-info, Node: Preface, Next: License, Prev: Top, Up: Top - -Preface -******* - -If you are like many computer users, you frequently would like to -make changes in various text files wherever certain patterns appear, -or extract data from parts of certain lines while discarding the -rest. To write a program to do this in a language such as C or -Pascal is a time--consuming inconvenience that may take many lines of -code. The job may be easier with `awk'. - -The `awk' utility interprets a special--purpose programming language -that makes it possible to handle simple data--reformatting jobs -easily with just a few lines of code. - -The GNU implementation of `awk' is called `gawk'; it is fully upward -compatible with the System V Release 3.1 and later version of `awk'. -All properly written `awk' programs should work with `gawk'. So we -usually don't distinguish between `gawk' and other `awk' -implementations in this manual. - -This manual teaches you what `awk' does and how you can use `awk' -effectively. You should already be familiar with basic, -general--purpose, operating system commands such as `ls'. Using -`awk' you can: - - * manage small, personal databases, - - * generate reports, - - * validate data, - - * produce indexes, and perform other document preparation tasks, - - * even experiment with algorithms that can be adapted later to - other computer languages! - -* Menu: - -* History:: The history of gawk and awk. Acknowledgements. - - - -File: gawk-info, Node: History, Up: Preface - -History of `awk' and `gawk' -=========================== - -The name `awk' comes from the initials of its designers: Alfred V. -Aho, Peter J. Weinberger, and Brian W. Kernighan. The original -version of `awk' was written in 1977. In 1985 a new version made the -programming language more powerful, introducing user--defined -functions, multiple input streams, and computed regular expressions. - -The GNU implementation, `gawk', was written in 1986 by Paul Rubin and -Jay Fenlason, with advice from Richard Stallman. John Woods -contributed parts of the code as well. In 1988, David Trueman, with -help from Arnold Robbins, reworked `gawk' for compatibility with the -newer `awk'. - -Many people need to be thanked for their assistance in producing this -manual. Jay Fenlason contributed many ideas and sample programs. -Richard Mlynarik and Robert Chassell gave helpful comments on drafts -of this manual. The paper ``A Supplemental Document for `awk''' by -John W. Pierce of the Chemistry Department at UC San Diego, -pinpointed several issues relevant both to `awk' implementation and -to this manual, that would otherwise have escaped us. - -Finally, we would like to thank Brian Kernighan of Bell Labs for -invaluable assistance during the testing and debugging of `gawk', and -for help in clarifying several points about the language. - - - -File: gawk-info, Node: License, Next: This Manual, Prev: Preface, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 1, February 1989 - - Copyright (C) 1989 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble -========= - - The license agreements of most software companies try to keep users -at the mercy of those companies. By contrast, our General Public -License is intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. -The General Public License applies to the Free Software Foundation's -software and to any other program whose authors commit to using it. -You can use it for your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Specifically, the General Public License is designed to make -sure that you have the freedom to give away or sell copies of free -software, that you receive source code or can get it if you want it, -that you can change the software or use pieces of it in new free -programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if -you distribute copies of the software, or if you modify it. - - For example, if you distribute copies of a such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must tell them their rights. - - We protect your rights with two steps: (1) copyright the software, -and (2) offer you this license which gives you legal permission to -copy, distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, -we want its recipients to know that what they have is not the -original, so that any problems introduced by others will not reflect -on the original authors' reputations. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 1. This License Agreement applies to any program or other work - which contains a notice placed by the copyright holder saying it - may be distributed under the terms of this General Public - License. The ``Program'', below, refers to any such program or - work, and a ``work based on the Program'' means either the - Program or any work containing the Program or a portion of it, - either verbatim or with modifications. Each licensee is - addressed as ``you''. - - 2. You may copy and distribute verbatim copies of the Program's - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an - appropriate copyright notice and disclaimer of warranty; keep - intact all the notices that refer to this General Public License - and to the absence of any warranty; and give any other - recipients of the Program a copy of this General Public License - along with the Program. You may charge a fee for the physical - act of transferring a copy. - - 3. You may modify your copy or copies of the Program or any portion - of it, and copy and distribute such modifications under the - terms of Paragraph 1 above, provided that you also do the - following: - - * cause the modified files to carry prominent notices stating - that you changed the files and the date of any change; and - - * cause the whole of any work that you distribute or publish, - that in whole or in part contains the Program or any part - thereof, either with or without modifications, to be - licensed at no charge to all third parties under the terms - of this General Public License (except that you may choose - to grant warranty protection to some or all third parties, - at your option). - - * If the modified program normally reads commands - interactively when run, you must cause it, when started - running for such interactive use in the simplest and most - usual way, to print or display an announcement including an - appropriate copyright notice and a notice that there is no - warranty (or else, saying that you provide a warranty) and - that users may redistribute the program under these - conditions, and telling the user how to view a copy of this - General Public License. - - * You may charge a fee for the physical act of transferring a - copy, and you may at your option offer warranty protection - in exchange for a fee. - - Mere aggregation of another independent work with the Program - (or its derivative) on a volume of a storage or distribution - medium does not bring the other work under the scope of these - terms. - - 4. You may copy and distribute the Program (or a portion or - derivative of it, under Paragraph 2) in object code or - executable form under the terms of Paragraphs 1 and 2 above - provided that you also do one of the following: - - * accompany it with the complete corresponding - machine-readable source code, which must be distributed - under the terms of Paragraphs 1 and 2 above; or, - - * accompany it with a written offer, valid for at least three - years, to give any third party free (except for a nominal - charge for the cost of distribution) a complete - machine-readable copy of the corresponding source code, to - be distributed under the terms of Paragraphs 1 and 2 above; - or, - - * accompany it with the information you received as to where - the corresponding source code may be obtained. (This - alternative is allowed only for noncommercial distribution - and only if you received the program in object code or - executable form alone.) - - Source code for a work means the preferred form of the work for - making modifications to it. For an executable file, complete - source code means all the source code for all modules it - contains; but, as a special exception, it need not include - source code for modules which are standard libraries that - accompany the operating system on which the executable file - runs, or for standard header files or definitions files that - accompany that operating system. - - 5. You may not copy, modify, sublicense, distribute or transfer the - Program except as expressly provided under this General Public - License. Any attempt otherwise to copy, modify, sublicense, - distribute or transfer the Program is void, and will - automatically terminate your rights to use the Program under - this License. However, parties who have received copies, or - rights to use copies, from you under this General Public License - will not have their licenses terminated so long as such parties - remain in full compliance. - - 6. By copying, distributing or modifying the Program (or any work - based on the Program) you indicate your acceptance of this - license to do so, and all its terms and conditions. - - 7. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from - the original licensor to copy, distribute or modify the Program - subject to these terms and conditions. You may not impose any - further restrictions on the recipients' exercise of the rights - granted herein. - - 8. The Free Software Foundation may publish revised and/or new - versions of the General Public License from time to time. Such - new versions will be similar in spirit to the present version, - but may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies a version number of the license which applies - to it and ``any later version'', you have the option of - following the terms and conditions either of that version or of - any later version published by the Free Software Foundation. If - the Program does not specify a version number of the license, - you may choose any version ever published by the Free Software - Foundation. - - 9. If you wish to incorporate parts of the Program into other free - programs whose distribution conditions are different, write to - the author to ask for permission. For software which is - copyrighted by the Free Software Foundation, write to the Free - Software Foundation; we sometimes make exceptions for this. Our - decision will be guided by the two goals of preserving the free - status of all derivatives of our free software and of promoting - the sharing and reuse of software generally. - - NO WARRANTY - - 10. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO - WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE - LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM ``AS IS'' - WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, - INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE - ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS - WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE - COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 11. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY - MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE - LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, - INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR - INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS - OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY - YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH - ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - -Appendix: How to Apply These Terms to Your New Programs -======================================================= - - If you develop a new program, and you want it to be of the greatest -possible use to humanity, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Also add information on how to contact you by electronic and paper -mail. - -If the program is interactive, make it output a short notice like -this when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - - The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and -`show c'; they could even be mouse-clicks or menu items--whatever -suits your program. - -You should also get your employer (if you work as a programmer) or -your school, if any, to sign a ``copyright disclaimer'' for the -program, if necessary. Here a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - program `Gnomovision' (a program to direct compilers to make passes - at assemblers) written by James Hacker. - - SIGNATURE OF TY COON, 1 April 1989 - Ty Coon, President of Vice - -That's all there is to it! - - - -File: gawk-info, Node: This Manual, Next: Getting Started, Prev: License, Up: Top - -Using This Manual -***************** - -The term `gawk' refers to a program (a version of `awk') developed by -the Free Software Foundation, and to the language you use to tell it -what to do. When we need to be careful, we call the program ``the -`awk' utility'' and the language ``the `awk' language''. The purpose -of this manual is to explain the `awk' language and how to run the -`awk' utility. - -The term "`awk' program" refers to a program written by you in the -`awk' programming language. - -*Note Getting Started::, for the bare essentials you need to know to -start using `awk'. - -Useful ``one--liners'' are included to give you a feel for the `awk' -language (*note One-liners::.). - -A sizable sample `awk' program has been provided for you (*note -Sample Program::.). - -If you find terms that you aren't familiar with, try looking them up -in the glossary (*note Glossary::.). - -Most of the time complete `awk' programs are used as examples, but in -some of the more advanced sections, only the part of the `awk' -program that illustrates the concept being described is shown. - -* Menu: - -This chapter contains the following sections: - -* The Files:: Sample data files for use in the `awk' programs - illustrated in this manual. - - - -File: gawk-info, Node: The Files, Up: This Manual - -Input Files for the Examples -============================ - -This manual contains many sample programs. The data for many of -those programs comes from two files. The first file, called -`BBS-list', represents a list of computer bulletin board systems and -information about those systems. - -Each line of this file is one "record". Each record contains the -name of a computer bulletin board, its phone number, the board's baud -rate, and a code for the number of hours it is operational. An `A' -in the last column means the board operates 24 hours all week. A `B' -in the last column means the board operates evening and weekend -hours, only. A `C' means the board operates only on weekends. - - aardvark 555-5553 1200/300 B - alpo-net 555-3412 2400/1200/300 A - barfly 555-7685 1200/300 A - bites 555-1675 2400/1200/300 A - camelot 555-0542 300 C - core 555-2912 1200/300 C - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sdace 555-3430 2400/1200/300 A - sabafoo 555-2127 1200/300 C - -The second data file, called `inventory-shipped', represents -information about shipments during the year. Each line of this file -is also one record. Each record contains the month of the year, the -number of green crates shipped, the number of red boxes shipped, the -number of orange bags shipped, and the number of blue packages -shipped, respectively. - - Jan 13 25 15 115 - Feb 15 32 24 226 - Mar 15 24 34 228 - Apr 31 52 63 420 - May 16 34 29 208 - Jun 31 42 75 492 - Jul 24 34 67 436 - Aug 15 34 47 316 - Sep 13 55 37 277 - Oct 29 54 68 525 - Nov 20 87 82 577 - Dec 17 35 61 401 - - Jan 21 36 64 620 - Feb 26 58 80 652 - Mar 24 75 70 495 - Apr 21 70 74 514 - -If you are reading this in GNU Emacs using Info, you can copy the -regions of text showing these sample files into your own test files. -This way you can try out the examples shown in the remainder of this -document. You do this by using the command `M-x write-region' to -copy text from the Info file into a file for use with `awk' (see your -``GNU Emacs Manual'' for more information). Using this information, -create your own `BBS-list' and `inventory-shipped' files, and -practice what you learn in this manual. - - - -File: gawk-info, Node: Getting Started, Next: Reading Files, Prev: This Manual, Up: Top - -Getting Started With `awk' -************************** - -The basic function of `awk' is to search files for lines (or other -units of text) that contain certain patterns. When a line matching -any of those patterns is found, `awk' performs specified actions on -that line. Then `awk' keeps processing input lines until the end of -the file is reached. - -An `awk' "program" or "script" consists of a series of "rules". -(They may also contain "function definitions", but that is an -advanced feature, so let's ignore it for now. *Note User-defined::.) - -A rule contains a "pattern", an "action", or both. Actions are -enclosed in curly braces to distinguish them from patterns. -Therefore, an `awk' program is a sequence of rules in the form: - - PATTERN { ACTION } - PATTERN { ACTION } - ... - - * Menu: - -* Very Simple:: A very simple example. -* Two Rules:: A less simple one--line example with two rules. -* More Complex:: A more complex example. -* Running gawk:: How to run gawk programs; includes command line syntax. -* Comments:: Adding documentation to gawk programs. -* Statements/Lines:: Subdividing or combining statements into lines. - -* When:: When to use gawk and when to use other things. - - - -File: gawk-info, Node: Very Simple, Next: Two Rules, Up: Getting Started - -A Very Simple Example -===================== - -The following command runs a simple `awk' program that searches the -input file `BBS-list' for the string of characters: `foo'. (A string -of characters is usually called, quite simply, a "string".) - - awk '/foo/ { print $0 }' BBS-list - -When lines containing `foo' are found, they are printed, because -`print $0' means print the current line. (Just `print' by itself -also means the same thing, so we could have written that instead.) - -You will notice that slashes, `/', surround the string `foo' in the -actual `awk' program. The slashes indicate that `foo' is a pattern -to search for. This type of pattern is called a "regular -expression", and is covered in more detail later (*note Regexp::.). -There are single quotes around the `awk' program so that the shell -won't interpret any of it as special shell characters. - -Here is what this program prints: - - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sabafoo 555-2127 1200/300 C - -In an `awk' rule, either the pattern or the action can be omitted, -but not both. - -If the pattern is omitted, then the action is performed for *every* -input line. - -If the action is omitted, the default action is to print all lines -that match the pattern. We could leave out the action (the print -statement and the curly braces) in the above example, and the result -would be the same: all lines matching the pattern `foo' would be -printed. (By comparison, omitting the print statement but retaining -the curly braces makes an empty action that does nothing; then no -lines would be printed.) - - - -File: gawk-info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up: Getting Started - -An Example with Two Rules -========================= - -The `awk' utility reads the input files one line at a time. For each -line, `awk' tries the patterns of all the rules. If several patterns -match then several actions are run, in the order in which they appear -in the `awk' program. If no patterns match, then no actions are run. - -After processing all the rules (perhaps none) that match the line, -`awk' reads the next line (however, *note Next::.). This continues -until the end of the file is reached. - -For example, the `awk' program: - - /12/ { print $0 } - /21/ { print $0 } - -contains two rules. The first rule has the string `12' as the -pattern and `print $0' as the action. The second rule has the string -`21' as the pattern and also has `print $0' as the action. Each -rule's action is enclosed in its own pair of braces. - -This `awk' program prints every line that contains the string `12' -*or* the string `21'. If a line contains both strings, it is printed -twice, once by each rule. - -If we run this program on our two sample data files, `BBS-list' and -`inventory-shipped', as shown here: - - awk '/12/ { print $0 } - /21/ { print $0 }' BBS-list inventory-shipped - -we get the following output: - - aardvark 555-5553 1200/300 B - alpo-net 555-3412 2400/1200/300 A - barfly 555-7685 1200/300 A - bites 555-1675 2400/1200/300 A - core 555-2912 1200/300 C - fooey 555-1234 2400/1200/300 B - foot 555-6699 1200/300 B - macfoo 555-6480 1200/300 A - sdace 555-3430 2400/1200/300 A - sabafoo 555-2127 1200/300 C - sabafoo 555-2127 1200/300 C - Jan 21 36 64 620 - Apr 21 70 74 514 - -Note how the line in `BBS-list' beginning with `sabafoo' was printed -twice, once for each rule. - - - -File: gawk-info, Node: More Complex, Next: Running gawk, Prev: Two Rules, Up: Getting Started - -A More Complex Example -====================== - -Here is an example to give you an idea of what typical `awk' programs -do. This example shows how `awk' can be used to summarize, select, -and rearrange the output of another utility. It uses features that -haven't been covered yet, so don't worry if you don't understand all -the details. - - ls -l | awk '$5 == "Nov" { sum += $4 } - END { print sum }' - -This command prints the total number of bytes in all the files in the -current directory that were last modified in November (of any year). -(In the C shell you would need to type a semicolon and then a -backslash at the end of the first line; in the Bourne shell you can -type the example as shown.) - -The `ls -l' part of this example is a command that gives you a full -listing of all the files in a directory, including file size and date. -Its output looks like this: - - -rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile - -rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h - -rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h - -rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y - -rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c - -rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c - -rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c - -rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c - -The first field contains read--write permissions, the second field -contains the number of links to the file, and the third field -identifies the owner of the file. The fourth field contains the size -of the file in bytes. The fifth, sixth, and seventh fields contain -the month, day, and time, respectively, that the file was last -modified. Finally, the eighth field contains the name of the file. - -The `$5 == "Nov"' in our `awk' program is an expression that tests -whether the fifth field of the output from `ls -l' matches the string -`Nov'. Each time a line has the string `Nov' in its fifth field, the -action `{ sum += $4 }' is performed. This adds the fourth field (the -file size) to the variable `sum'. As a result, when `awk' has -finished reading all the input lines, `sum' will be the sum of the -sizes of files whose lines matched the pattern. - -After the last line of output from `ls' has been processed, the `END' -pattern is executed, and the value of `sum' is printed. In this -example, the value of `sum' would be 80600. - -These more advanced `awk' techniques are covered in later sections -(*note Actions::.). Before you can move on to more advanced `awk' -programming, you have to know how `awk' interprets your input and -displays your output. By manipulating "fields" and using special -"print" statements, you can produce some very useful and spectacular -looking reports. - - - -File: gawk-info, Node: Running gawk, Next: Comments, Prev: More Complex, Up: Getting Started - -How to Run `awk' Programs -========================= - -There are several ways to run an `awk' program. If the program is -short, it is easiest to include it in the command that runs `awk', -like this: - - awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ... - - where PROGRAM consists of a series of PATTERNS and ACTIONS, as -described earlier. - -When the program is long, you would probably prefer to put it in a -file and run it with a command like this: - - awk -f PROGRAM-FILE INPUT-FILE1 INPUT-FILE2 ... - - * Menu: - -* One-shot:: Running a short throw--away `awk' program. -* Read Terminal:: Using no input files (input from terminal instead). -* Long:: Putting permanent `awk' programs in files. -* Executable Scripts:: Making self--contained `awk' programs. -* Command Line:: How the `awk' command line is laid out. - - - -File: gawk-info, Node: One-shot, Next: Read Terminal, Up: Running gawk - -One--shot Throw--away `awk' Programs ------------------------------------- - -Once you are familiar with `awk', you will often type simple programs -at the moment you want to use them. Then you can write the program -as the first argument of the `awk' command, like this: - - awk 'PROGRAM' INPUT-FILE1 INPUT-FILE2 ... - - where PROGRAM consists of a series of PATTERNS and ACTIONS, as -described earlier. - -This command format tells the shell to start `awk' and use the -PROGRAM to process records in the input file(s). There are single -quotes around the PROGRAM so that the shell doesn't interpret any -`awk' characters as special shell characters. They cause the shell -to treat all of PROGRAM as a single argument for `awk'. They also -allow PROGRAM to be more than one line long. - -This format is also useful for running short or medium--sized `awk' -programs from shell scripts, because it avoids the need for a -separate file for the `awk' program. A self--contained shell script -is more reliable since there are no other files to misplace. - - - -File: gawk-info, Node: Read Terminal, Next: Long, Prev: One-shot, Up: Running gawk - -Running `awk' without Input Files ---------------------------------- - -You can also use `awk' without any input files. If you type the -command line: - - awk 'PROGRAM' - -then `awk' applies the PROGRAM to the "standard input", which usually -means whatever you type on the terminal. This continues until you -indicate end--of--file by typing `Control-d'. - -For example, if you type: - - awk '/th/' - -whatever you type next will be taken as data for that `awk' program. -If you go on to type the following data, - - Kathy - Ben - Tom - Beth - Seth - Karen - Thomas - `Control-d' - -then `awk' will print - - Kathy - Beth - Seth - -as matching the pattern `th'. Notice that it did not recognize -`Thomas' as matching the pattern. The `awk' language is "case -sensitive", and matches patterns *exactly*. - - - -File: gawk-info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk - -Running Long Programs ---------------------- - -Sometimes your `awk' programs can be very long. In this case it is -more convenient to put the program into a separate file. To tell -`awk' to use that file for its program, you type: - - awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ... - - The `-f' tells the `awk' utility to get the `awk' program from the -file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For -example, you could put the program: - - /th/ - -into the file `th-prog'. Then the command: - - awk -f th-prog - -does the same thing as this one: - - awk '/th/' - -which was explained earlier (*note Read Terminal::.). Note that you -don't usually need single quotes around the file name that you -specify with `-f', because most file names don't contain any of the -shell's special characters. - -If you want to identify your `awk' program files clearly as such, you -can add the extension `.awk' to the filename. This doesn't affect -the execution of the `awk' program, but it does make ``housekeeping'' -easier. - - - -File: gawk-info, Node: Executable Scripts, Next: Command Line, Prev: Long, Up: Running gawk - -Executable `awk' Programs -------------------------- - -(The following section assumes that you are already somewhat familiar -with `awk'.) - -Once you have learned `awk', you may want to write self--contained -`awk' scripts, using the `#!' script mechanism. You can do this on -BSD Unix systems and GNU. - -For example, you could create a text file named `hello', containing -the following (where `BEGIN' is a feature we have not yet discussed): - - #! /bin/awk -f - - # a sample awk program - - BEGIN { print "hello, world" } - -After making this file executable (with the `chmod' command), you can -simply type: - - hello - -at the shell, and the system will arrange to run `awk' as if you had -typed: - - awk -f hello - -Self--contained `awk' scripts are particularly useful for putting -`awk' programs into production on your system, without your users -having to know that they are actually using an `awk' program. - -If your system does not support the `#!' mechanism, you can get a -similar effect using a regular shell script. It would look something -like this: - - : a sample awk program - - awk 'PROGRAM' "$@" - -Using this technique, it is *vital* to enclose the PROGRAM in single -quotes to protect it from interpretation by the shell. If you omit -the quotes, only a shell wizard can predict the result. - -The `"$@"' causes the shell to forward all the command line arguments -to the `awk' program, without interpretation. - - - -File: gawk-info, Node: Command Line, Prev: Executable Scripts, Up: Running gawk - -Details of the `awk' Command Line ---------------------------------- - -(The following section assumes that you are already familiar with -`awk'.) - -There are two ways to run `awk'. Here are templates for both of -them; items enclosed in `[' and `]' in these templates are optional. - - awk [ -FFS ] [ -- ] 'PROGRAM' FILE ... - awk [ -FFS ] -f SOURCE-FILE [ -f SOURCE-FILE ... ] [ -- ] FILE ... - - Options begin with a minus sign, and consist of a single character. -The options and their meanings are as follows: - -`-FFS' - This sets the `FS' variable to FS (*note Special::.). As a - special case, if FS is `t', then `FS' will be set to the tab - character (`"\t"'). - -`-f SOURCE-FILE' - Indicates that the `awk' program is to be found in SOURCE-FILE - instead of in the first non--option argument. - -`--' - This signals the end of the command line options. If you wish - to specify an input file named `-f', you can precede it with the - `--' argument to prevent the `-f' from being interpreted as an - option. This handling of `--' follows the POSIX argument - parsing conventions. - -Any other options will be flagged as invalid with a warning message, -but are otherwise ignored. - -If the `-f' option is *not* used, then the first non--option command -line argument is expected to be the program text. - -The `-f' option may be used more than once on the command line. -`awk' will read its program source from all of the named files, as if -they had been concatenated together into one big file. This is -useful for creating libraries of `awk' functions. Useful functions -can be written once, and then retrieved from a standard place, -instead of having to be included into each individual program. You -can still type in a program at the terminal and use library -functions, by specifying `/dev/tty' as one of the arguments to a -`-f'. Type your program, and end it with the keyboard end--of--file -character `Control-d'. - -Any additional arguments on the command line are made available to -your `awk' program in the `ARGV' array (*note Special::.). These -arguments are normally treated as input files to be processed in the -order specified. However, an argument that has the form VAR`='VALUE, -means to assign the value VALUE to the variable VAR--it does not -specify a file at all. - -Command line options and the program text (if present) are omitted -from the `ARGV' array. All other arguments, including variable -assignments, are included (*note Special::.). - -The distinction between file name arguments and variable--assignment -arguments is made when `awk' is about to open the next input file. -At that point in execution, it checks the ``file name'' to see -whether it is really a variable assignment; if so, instead of trying -to read a file it will, *at that point in the execution*, assign the -variable. - -Therefore, the variables actually receive the specified values after -all previously specified files have been read. In particular, the -values of variables assigned in this fashion are *not* available -inside a `BEGIN' rule (*note BEGIN/END::.), since such rules are run -before `awk' begins scanning the argument list. - -The variable assignment feature is most useful for assigning to -variables such as `RS', `OFS', and `ORS', which control input and -output formats, before listing the data files. It is also useful for -controlling state if multiple passes are needed over a data file. -For example: - - awk 'pass == 1 { PASS 1 STUFF } - pass == 2 { PASS 2 STUFF }' pass=1 datafile pass=2 datafile - - - -File: gawk-info, Node: Comments, Next: Statements/Lines, Prev: Running gawk, Up: Getting Started - -Comments in `awk' Programs -========================== - -When you write a complicated `awk' program, you can put "comments" in -the program file to help you remember what the program does, and how -it works. - -A comment starts with the the sharp sign character, `#', and -continues to the end of the line. The `awk' language ignores the -rest of a line following a sharp sign. For example, we could have -put the following into `th-prog': - - # This program finds records containing the pattern `th'. This is how - # you continue comments on additional lines. - /th/ - -You can put comment lines into keyboard--composed throw--away `awk' -programs also, but this usually isn't very useful; the purpose of a -comment is to help yourself or another person understand the program -at another time. - - - -File: gawk-info, Node: Statements/Lines, Next: When, Prev: Comments, Up: Getting Started - -`awk' Statements versus Lines -============================= - -Most often, each line in an `awk' program is a separate statement or -separate rule, like this: - - awk '/12/ { print $0 } - /21/ { print $0 }' BBS-list inventory-shipped - -But sometimes statements can be more than one line, and lines can -contain several statements. - -You can split a statement into multiple lines by inserting a newline -after any of the following: - - , { ? : || && - -Lines ending in `do' or `else' automatically have their statements -continued on the following line(s). A newline at any other point -ends the statement. - -If you would like to split a single statement into two lines at a -point where a newline would terminate it, you can "continue" it by -ending the first line with a backslash character, `\'. This is -allowed absolutely anywhere in the statement, even in the middle of a -string or regular expression. For example: - - awk '/This program is too long, so continue it\ - on the next line/ { print $1 }' - -We have generally not used backslash continuation in the sample -programs in this manual. Since there is no limit on the length of a -line, it is never strictly necessary; it just makes programs -prettier. We have preferred to make them even more pretty by keeping -the statements short. Backslash continuation is most useful when -your `awk' program is in a separate source file, instead of typed in -on the command line. - -*Warning: this does not work if you are using the C shell.* -Continuation with backslash works for `awk' programs in files, and -also for one--shot programs *provided* you are using the Bourne -shell, the Korn shell, or the Bourne--again shell. But the C shell -used on Berkeley Unix behaves differently! There, you must use two -backslashes in a row, followed by a newline. - -When `awk' statements within one rule are short, you might want to -put more than one of them on a line. You do this by separating the -statements with semicolons, `;'. This also applies to the rules -themselves. Thus, the above example program could have been written: - - /12/ { print $0 } ; /21/ { print $0 } - -*Note:* It is a new requirement that rules on the same line require -semicolons as a separator in the `awk' language; it was done for -consistency with the statements in the action part of rules. - - - -File: gawk-info, Node: When, Prev: Statements/Lines, Up: Getting Started - -When to Use `awk' -================= - -What use is all of this to me, you might ask? Using additional -operating system utilities, more advanced patterns, field separators, -arithmetic statements, and other selection criteria, you can produce -much more complex output. The `awk' language is very useful for -producing reports from large amounts of raw data, like summarizing -information from the output of standard operating system programs -such as `ls'. (*Note A More Complex Example: More Complex.) - -Programs written with `awk' are usually much smaller than they would -be in other languages. This makes `awk' programs easy to compose and -use. Often `awk' programs can be quickly composed at your terminal, -used once, and thrown away. Since `awk' programs are interpreted, -you can avoid the usually lengthy edit--compile--test--debug cycle of -software development. - -Complex programs have been written in `awk', including a complete -retargetable assembler for 8--bit microprocessors (*note Glossary::. -for more information) and a microcode assembler for a special purpose -Prolog computer. However, `awk''s capabilities are strained by tasks -of such complexity. - -If you find yourself writing `awk' scripts of more than, say, a few -hundred lines, you might consider using a different programming -language. Emacs Lisp is a good choice if you need sophisticated -string or pattern matching capabilities. The shell is also good at -string and pattern matching; in addition it allows powerful use of -the standard utilities. More conventional languages like C, C++, or -Lisp offer better facilities for system programming and for managing -the complexity of large programs. Programs in these languages may -require more lines of source code than the equivalent `awk' programs, -but they will be easier to maintain and usually run more efficiently. - - - -File: gawk-info, Node: Reading Files, Next: Printing, Prev: Getting Started, Up: Top - -Reading Files (Input) -********************* - -In the typical `awk' program, all input is read either from the -standard input (usually the keyboard) or from files whose names you -specify on the `awk' command line. If you specify input files, `awk' -reads data from the first one until it reaches the end; then it reads -the second file until it reaches the end, and so on. The name of the -current input file can be found in the special variable `FILENAME' -(*note Special::.). - -The input is split automatically into "records", and processed by the -rules one record at a time. (Records are the units of text mentioned -in the introduction; by default, a record is a line of text.) Each -record read is split automatically into "fields", to make it more -convenient for a rule to work on parts of the record under -consideration. - -On rare occasions you will need to use the `getline' command, which -can do explicit input from any number of files. - -* Menu: - -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Field Separators:: The field separator and how to change it. -* Multiple:: Reading multi--line records. - -* Assignment Options:: Setting variables on the command line and a summary - of command line syntax. This is an advanced method - of input. - -* Getline:: Reading files under explicit program control - using the `getline' function. -* Close Input:: Closing an input file (so you can read from - the beginning once more). - - - -File: gawk-info, Node: Records, Next: Fields, Up: Reading Files - -How Input is Split into Records -=============================== - -The `awk' language divides its input into records and fields. -Records are separated from each other by the "record separator". By -default, the record separator is the "newline" character. Therefore, -normally, a record is a line of text. - -Sometimes you may want to use a different character to separate your -records. You can use different characters by changing the special -variable `RS'. - -The value of `RS' is a string that says how to separate records; the -default value is `"\n"', the string of just a newline character. -This is why lines of text are the default record. Although `RS' can -have any string as its value, only the first character of the string -will be used as the record separator. The other characters are -ignored. `RS' is exceptional in this regard; `awk' uses the full -value of all its other special variables. - -The value of `RS' is changed by "assigning" it a new value (*note -Assignment Ops::.). One way to do this is at the beginning of your -`awk' program, before any input has been processed, using the special -`BEGIN' pattern (*note BEGIN/END::.). This way, `RS' is changed to -its new value before any input is read. The new value of `RS' is -enclosed in quotation marks. For example: - - awk 'BEGIN { RS = "/" } ; { print $0 }' BBS-list - -changes the value of `RS' to `/', the slash character, before reading -any input. Records are now separated by a slash. The second rule in -the `awk' program (the action with no pattern) will proceed to print -each record. Since each `print' statement adds a newline at the end -of its output, the effect of this `awk' program is to copy the input -with each slash changed to a newline. - -Another way to change the record separator is on the command line, -using the variable--assignment feature (*note Command Line::.). - - awk '...' RS="/" SOURCE-FILE - -`RS' will be set to `/' before processing SOURCE-FILE. - -The empty string (a string of no characters) has a special meaning as -the value of `RS': it means that records are separated only by blank -lines. *Note Multiple::, for more details. - -The `awk' utility keeps track of the number of records that have been -read so far from the current input file. This value is stored in a -special variable called `FNR'. It is reset to zero when a new file -is started. Another variable, `NR', is the total number of input -records read so far from all files. It starts at zero but is never -automatically reset to zero. - -If you change the value of `RS' in the middle of an `awk' run, the -new value is used to delimit subsequent records, but the record -currently being processed (and records already finished) are not -affected. - - diff --git a/gawk-info-2 b/gawk-info-2 deleted file mode 100644 index a228c5b9..00000000 --- a/gawk-info-2 +++ /dev/null @@ -1,1265 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: Fields, Next: Non-Constant Fields, Prev: Records, Up: Reading Files - -Examining Fields -================ - -When `awk' reads an input record, the record is automatically -separated or "parsed" by the interpreter into pieces called "fields". -By default, fields are separated by whitespace, like words in a line. -Whitespace in `awk' means any string of one or more spaces and/or -tabs; other characters such as newline, formfeed, and so on, that are -considered whitespace by other languages are *not* considered -whitespace by `awk'. - -The purpose of fields is to make it more convenient for you to refer -to these pieces of the record. You don't have to use them--you can -operate on the whole record if you wish--but fields are what make -simple `awk' programs so powerful. - -To refer to a field in an `awk' program, you use a dollar--sign, `$', -followed by the number of the field you want. Thus, `$1' refers to -the first field, `$2' to the second, and so on. For example, suppose -the following is a line of input: - - This seems like a pretty nice example. - - Here the first field, or `$1', is `This'; the second field, or `$2', -is `seems'; and so on. Note that the last field, `$7', is -`example.'. Because there is no space between the `e' and the `.', -the period is considered part of the seventh field. - -No matter how many fields there are, the last field in a record can -be represented by `$NF'. So, in the example above, `$NF' would be -the same as `$7', which is `example.'. Why this works is explained -below (*note Non-Constant Fields::.). If you try to refer to a field -beyond the last one, such as `$8' when the record has only 7 fields, -you get the empty string. - -Plain `NF', with no `$', is a special variable whose value is the -number of fields in the current record. - -`$0', which looks like an attempt to refer to the zeroth field, is a -special case: it represents the whole input record. This is what you -would use when you aren't interested in fields. - -Here are some more examples: - - awk '$1 ~ /foo/ { print $0 }' BBS-list - -This example contains the "matching" operator `~' (*note Comparison -Ops::.). Using this operator, all records in the file `BBS-list' -whose first field contains the string `foo' are printed. - -By contrast, the following example: - - awk '/foo/ { print $1, $NF }' BBS-list - -looks for the string `foo' in *the entire record* and prints the -first field and the last field for each input record containing the -pattern. - -The following program will search the system password file, and print -the entries for users who have no password. - - awk -F: '$2 == ""' /etc/passwd - -This program uses the `-F' option on the command line to set the file -separator. (Fields in `/etc/passwd' are separated by colons. The -second field represents a user's encrypted password, but if the field -is empty, that user has no password.) - - - -File: gawk-info, Node: Non-Constant Fields, Next: Changing Fields, Prev: Fields, Up: Reading Files - -Non-constant Field Numbers -========================== - -The number of a field does not need to be a constant. Any expression -in the `awk' language can be used after a `$' to refer to a field. -The `awk' utility evaluates the expression and uses the "numeric -value" as a field number. Consider this example: - - awk '{ print $NR }' - -Recall that `NR' is the number of records read so far: 1 in the first -record, 2 in the second, etc. So this example will print the first -field of the first record, the second field of the second record, and -so on. For the twentieth record, field number 20 will be printed; -most likely this will make a blank line, because the record will not -have 20 fields. - -Here is another example of using expressions as field numbers: - - awk '{ print $(2*2) }' BBS-list - -The `awk' language must evaluate the expression `(2*2)' and use its -value as the field number to print. The `*' sign represents -multiplication, so the expression `2*2' evaluates to 4. This -example, then, prints the hours of operation (the fourth field) for -every line of the file `BBS-list'. - -When you use non--constant field numbers, you may ask for a field -with a negative number. This always results in an empty string, just -like a field whose number is too large for the input record. For -example, `$(1-4)' would try to examine field number -3; it would -result in an empty string. - -If the field number you compute is zero, you get the entire record. - -The number of fields in the current record is stored in the special -variable `NF' (*note Special::.). The expression `$NF' is not a -special feature: it is the direct consequence of evaluating `NF' and -using its value as a field number. - - - -File: gawk-info, Node: Changing Fields, Next: Field Separators, Prev: Non-Constant Fields, Up: Reading Files - -Changing the Contents of a Field -================================ - -You can change the contents of a field as seen by `awk' within an -`awk' program; this changes what `awk' perceives as the current input -record. (The actual input is untouched: `awk' never modifies the -input file.) - -Look at this example: - - awk '{ $3 = $2 - 10; print $2, $3 }' inventory-shipped - -The `-' sign represents subtraction, so this program reassigns field -three, `$3', to be the value of field two minus ten, ``$2' - 10'. -(*Note Arithmetic Ops::.) Then field two, and the new value for -field three, are printed. - -In order for this to work, the text in field `$2' must make sense as -a number; the string of characters must be converted to a number in -order for the computer to do arithmetic on it. The number resulting -from the subtraction is converted back to a string of characters -which then becomes field 3. *Note Conversion::. - -When you change the value of a field (as perceived by `awk'), the -text of the input record is recalculated to contain the new field -where the old one was. `$0' will from that time on reflect the -altered field. Thus, - - awk '{ $2 = $2 - 10; print $0 }' inventory-shipped - -will print a copy of the input file, with 10 subtracted from the -second field of each line. - -You can also assign contents to fields that are out of range. For -example: - - awk '{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 }' inventory-shipped - -We've just created `$6', whose value is the average of fields `$2', -`$3', `$4', and `$5'. The `+' sign represents addition, and the `/' -sign represents division. For the file `inventory-shipped' `$6' -represents the average number of parcels shipped for a particular -month. - -Creating a new field changes what `awk' interprets as the current -input record. The value of `$0' will be recomputed. This -recomputation affects and is affected by features not yet discussed, -in particular, the "Output Field Separator", `OFS', which is used to -separate the fields (*note Output Separators::.), and `NF' (the -number of fields; *note Fields::.). For example, the value of `NF' -will be set to the number of the highest out--of--range field you -create. - -Note, however, that merely *referencing* an out--of--range field will -*not* change the value of either `$0' or `NF'. Referencing an -out--of--range field merely produces a null string. For example: - - if ($(NF+1) != "") - print "can't happen" - else - print "everything is normal" - -should print `everything is normal'. (*Note If::, for more -information about `awk''s `if-else' statements.) - - - -File: gawk-info, Node: Field Separators, Next: Multiple, Prev: Changing Fields, Up: Reading Files - -Specifying How Fields Are Separated -=================================== - -You can change the way `awk' splits a record into fields by changing -the value of the "field separator". The field separator is -represented by the special variable `FS' in an `awk' program, and can -be set by `-F' on the command line. The `awk' language scans each -input line for the field separator character to determine the -positions of fields within that line. Shell programmers take note! -`awk' uses the variable `FS', not `IFS'. - -The default value of the field separator is a string containing a -single space. This value is actually a special case; as you know, by -default, fields are separated by whitespace sequences, not by single -spaces: two spaces in a row do not delimit an empty field. -``Whitespace'' is defined as sequences of one or more spaces or tab -characters. - -You change the value of `FS' by "assigning" it a new value. You can -do this using the special `BEGIN' pattern (*note BEGIN/END::.). This -pattern allows you to change the value of `FS' before any input is -read. The new value of `FS' is enclosed in quotations. For example, -set the value of `FS' to the string `","': - - awk 'BEGIN { FS = "," } ; { print $2 }' - -and use the input line: - - John Q. Smith, 29 Oak St., Walamazoo, MI 42139 - -This `awk' program will extract the string `29 Oak St.'. - -Sometimes your input data will contain separator characters that -don't separate fields the way you thought they would. For instance, -the person's name in the example we've been using might have a title -or suffix attached, such as `John Q. Smith, LXIX'. If you assigned -`FS' to be `,' then: - - awk 'BEGIN { FS = "," } ; { print $2 } - -would extract `LXIX', instead of `29 Oak St.'. If you were expecting -the program to print the address, you would be surprised. So, choose -your data layout and separator characters carefully to prevent -problems like this from happening. - -You can assign `FS' to be a series of characters. For example, the -assignment: - - FS = ", \t" - -makes every area of an input line that consists of a comma followed -by a space and a tab, into a field separator. (`\t' stands for a tab.) - -If `FS' is any single character other than a blank, then that -character is used as the field separator, and two successive -occurrences of that character do delimit an empty field. - -If you assign `FS' to a string longer than one character, that string -is evaluated as a "regular expression" (*note Regexp::.). The value -of the regular expression is used as a field separator. - -`FS' can be set on the command line. You use the `-F' argument to do -so. For example: - - awk -F, 'PROGRAM' INPUT-FILES - -sets `FS' to be the `,' character. Notice that the argument uses a -capital `F'. Contrast this with `-f', which specifies a file -containing an `awk' program. Case is significant in command options: -the `-F' and `-f' options have nothing to do with each other. You -can use both options at the same time to set the `FS' argument *and* -get an `awk' program from a file. - -As a special case, if the argument to `-F' is `t', then `FS' is set -to the tab character. (This is because if you type `-F\t', without -the quotes, at the shell, the `\' gets deleted, so `awk' figures that -you really want your fields to be separated with tabs, and not `t's. -Use `FS="t"' if you really do want to separate your fields with `t's.) - -For example, let's use an `awk' program file called `baud.awk' that -contains the pattern `/300/', and the action `print $1'. We'll use -the operating system utility `cat' to ``look'' at our program: - - % cat baud.awk - /300/ { print $1 } - -Let's also set `FS' to be the `-' character. We will apply all this -information to the file `BBS-list'. This `awk' program will now -print a list of the names of the bulletin boards that operate at 300 -baud and the first three digits of their phone numbers. - - awk -F- -f baud.awk BBS-list - -produces this output: - - aardvark 555 - alpo - barfly 555 - bites 555 - camelot 555 - core 555 - fooey 555 - foot 555 - macfoo 555 - sdace 555 - sabafoo 555 - -Note the second line of output. If you check the original file, you -will see that the second line looked like this: - - alpo-net 555-3412 2400/1200/300 A - -The `-' as part of the system's name was used as the field separator, -instead of the `-' in the phone number that was originally intended. -This demonstrates why you have to be careful in choosing your field -and record separators. - - - -File: gawk-info, Node: Multiple, Next: Assignment Options, Prev: Field Separators, Up: Reading Files - -Multiple--Line Records -====================== - -In some data bases, a single line cannot conveniently hold all the -information in one entry. Then you will want to use multi--line -records. - -The first step in doing this is to choose your data format: when -records are not defined as single lines, how will you want to define -them? What should separate records? - -One technique is to use an unusual character or string to separate -records. For example, you could use the formfeed character (written -`\f' in `awk', as in C) to separate them, making each record a page -of the file. To do this, just set the variable `RS' to `"\f"' (a -string containing the formfeed character), or whatever string you -prefer to use. - -Another technique is to have blank lines separate records. By a -special dispensation, a null string as the value of `RS' indicates -that records are separated by one or more blank lines. If you set -`RS' to the null string, a record will always end at the first blank -line encountered. And the next record won't start until the first -nonblank line that follows--no matter how many blank lines appear in -a row, they will be considered one record--separator. - -The second step is to separate the fields in the record. One way to -do this is to put each field on a separate line: to do this, just set -the variable `FS' to the string `"\n"'. (This simple regular -expression matches a single newline.) Another idea is to divide each -of the lines into fields in the normal manner; the regular expression -`"[ \t\n]+"' will do this nicely by treating the newlines inside the -record just like spaces. - -When `RS' is set to the null string, the newline character *always* -acts as a field separator. This is in addition to whatever value -`FS' has. The probable reason for this rule is so that you get -rational behavior in the default case (i.e. `FS == " "'). This can -be a problem if you really don't want the newline character to -separate fields, since there is no way to do that. However, you can -work around this by using the `split' function to manually break up -your data (*note String Functions::.). - -Here is how to use records separated by blank lines and break each -line into fields normally: - - awk 'BEGIN { RS = ""; FS = "[ \t\n]+" } ; { print $0 }' BBS-list - - - -File: gawk-info, Node: Assignment Options, Next: Getline, Prev: Multiple, Up: Reading Files - -Assigning Variables on the Command Line -======================================= - -You can include variable "assignments" among the file names on the -command line used to invoke `awk' (*note Command Line::.). Such -assignments have the form: - - VARIABLE=TEXT - -and allow you to change variables either at the beginning of the -`awk' run or in between input files. The variable assignment is -performed at a time determined by its position among the input file -arguments: after the processing of the preceding input file argument. -For example: - - awk '{ print $n }' n=4 inventory-shipped n=2 BBS-list - -prints the value of field number `n' for all input records. Before -the first file is read, the command line sets the variable `n' equal -to 4. This causes the fourth field of the file `inventory-shipped' -to be printed. After the first file has finished, but before the -second file is started, `n' is set to 2, so that the second field of -the file `BBS-list' will be printed. - -Command line arguments are made available for explicit examination by -the `awk' program in an array named `ARGV' (*note Special::.). - - - -File: gawk-info, Node: Getline, Prev: Assignment Options, Up: Reading Files - -Explicit Input with `getline' -============================= - -So far we have been getting our input files from `awk''s main input -stream--either the standard input (usually your terminal) or the -files specified on the command line. The `awk' language has a -special built--in function called `getline' that can be used to read -input under your explicit control. - -This command is quite complex and should *not* be used by beginners. -The command (and its variations) is covered here because this is the -section about input. The examples that follow the explanation of the -`getline' command include material that has not been covered yet. -Therefore, come back and attempt the `getline' command *after* you -have reviewed the rest of this manual and have a good knowledge of -how `awk' works. - -When retrieving input, `getline' returns a 1 if it found a record, -and a 0 if the end of the file was encountered. If there was some -error in getting a record, such as a file that could not be opened, -then `getline' returns a -1. - -In the following examples, COMMAND stands for a string value that -represents a shell command. - -`getline' - The `getline' function can be used by itself, in an `awk' - program, to read input from the current input. All it does in - this case is read the next input record and split it up into - fields. This is useful if you've finished processing the - current record, but you want to do some special processing - *right now* on the next record. Here's an example: - - awk '{ - if (t = index($0, "/*")) { - if(t > 1) - tmp = substr($0, 1, t - 1) - else - tmp = "" - u = index(substr($0, t + 2), "*/") - while (! u) { - getline - t = -1 - u = index($0, "*/") - } - if(u <= length($0) - 2) - $0 = tmp substr($0, t + u + 3) - else - $0 = tmp - } - print $0 - }' - - This `awk' program deletes all comments, `/* ... */', from the - input. By replacing the `print $0' with other statements, you - could perform more complicated processing on the de--commented - input, such as search it for matches for a regular expression. - - This form of the `getline' command sets `NF' (the number of - fields; *note Fields::.), `NR' (the number of records read so - far), the `FNR' variable (*note Records::.), and the value of - `$0'. - - *Note:* The new value of `$0' will be used in testing the - patterns of any subsequent rules. The original value of `$0' - that triggered the rule which executed `getline' is lost. By - contrast, the `next' statement reads a new record but - immediately begins processing it normally, starting with the - first rule in the program. *Note Next::. - -`getline VAR' - This form of `getline' reads a record into the variable VAR. - This is useful when you want your program to read the next - record from the input file, but you don't want to subject the - record to the normal input processing. - - For example, suppose the next line is a comment, or a special - string, and you want to read it, but you must make certain that - it won't accidentally trigger any rules. This version of - `getline' will allow you to read that line and store it in a - variable so that the main read--a--line--and--check--each--rule - loop of `awk' never sees it. - - The following example swaps every two lines of input. For - example, given: - - wan - tew - free - phore - - it outputs: - - tew - wan - phore - free - - Here's the program: - - awk '{ - if ((getline tmp) > 0) { - print tmp - print $0 - } else - print $0 - }' - - The `getline' function used in this way sets only `NR' and `FNR' - (and of course, VAR). The record is not split into fields, so - the values of the fields (including `$0') and the value of `NF' - do not change. - -`getline < FILE' - This form of the `getline' function takes its input from the - file FILE. Here FILE is a string--valued expression that - specifies the file name. - - This form is useful if you want to read your input from a - particular file, instead of from the main input stream. For - example, the following program reads its input record from the - file `foo.input' when it encounters a first field with a value - equal to 10 in the current input file. - - awk '{ - if ($1 == 10) { - getline < "foo.input" - print - } else - print - }' - - Since the main input stream is not used, the values of `NR' and - `FNR' are not changed. But the record read is split into fields - in the normal manner, so the values of `$0' and other fields are - changed. So is the value of `NF'. - - This does not cause the record to be tested against all the - patterns in the `awk' program, in the way that would happen if - the record were read normally by the main processing loop of - `awk'. However the new record is tested against any subsequent - rules, just as when `getline' is used without a redirection. - -`getline VAR < FILE' - This form of the `getline' function takes its input from the - file FILE and puts it in the variable VAR. As above, FILE is a - string--valued expression that specifies the file to read from. - - In this version of `getline', none of the built--in variables - are changed, and the record is not split into fields. The only - variable changed is VAR. - - For example, the following program copies all the input files to - the output, except for records that say `@include FILENAME'. - Such a record is replaced by the contents of the file FILENAME. - - awk '{ - if (NF == 2 && $1 == "@include") { - while ((getline line < $2) > 0) - print line - close($2) - } else - print - }' - - Note here how the name of the extra input file is not built into - the program; it is taken from the data, from the second field on - the `@include' line. - - The `close' command is used to ensure that if two identical - `@include' lines appear in the input, the entire specified file - is included twice. *Note Close Input::. - - One deficiency of this program is that it does not process - nested `@include' statements the way a true macro preprocessor - would. - -`COMMAND | getline' - You can "pipe" the output of a command into `getline'. A pipe - is simply a way to link the output of one program to the input - of another. In this case, the string COMMAND is run as a shell - command and its output is piped into `awk' to be used as input. - This form of `getline' reads one record from the pipe. - - For example, the following program copies input to output, - except for lines that begin with `@execute', which are replaced - by the output produced by running the rest of the line as a - shell command: - - awk '{ - if ($1 == "@execute") { - tmp = substr($0, 10) - while ((tmp | getline) > 0) - print - close(tmp) - } else - print - }' - - The `close' command is used to ensure that if two identical - `@execute' lines appear in the input, the command is run again - for each one. *Note Close Input::. - - Given the input: - - foo - bar - baz - @execute who - bletch - - the program might produce: - - foo - bar - baz - hack ttyv0 Jul 13 14:22 - hack ttyp0 Jul 13 14:23 (gnu:0) - hack ttyp1 Jul 13 14:23 (gnu:0) - hack ttyp2 Jul 13 14:23 (gnu:0) - hack ttyp3 Jul 13 14:23 (gnu:0) - bletch - - Notice that this program ran the command `who' and printed the - result. (If you try this program yourself, you will get - different results, showing you logged in.) - - This variation of `getline' splits the record into fields, sets - the value of `NF' and recomputes the value of `$0'. The values - of `NR' and `FNR' are not changed. - -`COMMAND | getline VAR' - The output of the command COMMAND is sent through a pipe to - `getline' and into the variable VAR. For example, the following - program reads the current date and time into the variable - `current_time', using the utility called `date', and then prints - it. - - awk 'BEGIN { - "date" | getline current_time - close("date") - print "Report printed on " current_time - }' - - In this version of `getline', none of the built--in variables - are changed, and the record is not split into fields. - - - -File: gawk-info, Node: Close Input, Up: Getline - -Closing Input Files -------------------- - -If the same file name or the same shell command is used with -`getline' more than once during the execution of the `awk' program, -the file is opened (or the command is executed) only the first time. -At that time, the first record of input is read from that file or -command. The next time the same file or command is used in -`getline', another record is read from it, and so on. - -What this implies is that if you want to start reading the same file -again from the beginning, or if you want to rerun a shell command -(rather that reading more output from the command), you must take -special steps. What you can do is use the `close' statement: - - close (FILENAME) - -This statement closes a file or pipe, represented here by FILENAME. -The string value of FILENAME must be the same value as the string -used to open the file or pipe to begin with. - -Once this statement is executed, the next `getline' from that file or -command will reopen the file or rerun the command. - - - -File: gawk-info, Node: Printing, Next: One-liners, Prev: Reading Files, Up: Top - -Printing Output -*************** - -One of the most common things that actions do is to output or "print" -some or all of the input. For simple output, use the `print' -statement. For fancier formatting use the `printf' statement. Both -are described in this chapter. - -* Menu: - -* Print:: The `print' statement. -* Print Examples:: Simple examples of `print' statements. -* Output Separators:: The output separators and how to change them. - -* Redirection:: How to redirect output to multiple files and pipes. -* Close Output:: How to close output files and pipes. - -* Printf:: The `printf' statement. - - - -File: gawk-info, Node: Print, Next: Print Examples, Up: Printing - -The `print' Statement -===================== - -The `print' statement does output with simple, standardized -formatting. You specify only the strings or numbers to be printed, -in a list separated by commas. They are output, separated by single -spaces, followed by a newline. The statement looks like this: - - print ITEM1, ITEM2, ... - - The entire list of items may optionally be enclosed in parentheses. -The parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a -redirection (*note Redirection::.). The relational operators are -`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison -Ops::.). - -The items printed can be constant strings or numbers, fields of the -current record (such as `$1'), variables, or any `awk' expressions. -The `print' statement is completely general for computing *what* -values to print. With one exception (*note Output Separators::.), -what you can't do is specify *how* to print them--how many columns to -use, whether to use exponential notation or not, and so on. For -that, you need the `printf' statement (*note Printf::.). - -To print a fixed piece of text, write a string constant as one item, -such as `"Hello there"'. If you forget to use the double--quote -characters, your text will be taken as an `awk' expression, and you -will probably get an error. Keep in mind that a space will be -printed between any two items. - -The simple statement `print' with no items is equivalent to `print -$0': it prints the entire current record. To print a blank line, use -`print ""', where `""' is the null, or empty, string. - -Most often, each `print' statement makes one line of output. But it -isn't limited to one line. If an item value is a string that -contains a newline, the newline is output along with the rest of the -string. A single `print' can make any number of lines this way. - - - -File: gawk-info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing - -Examples of `print' Statements -============================== - -Here is an example that prints the first two fields of each input -record, with a space between them: - - awk '{ print $1, $2 }' inventory-shipped - -Its output looks like this: - - Jan 13 - Feb 15 - Mar 15 - ... - - A common mistake in using the `print' statement is to omit the comma -between two items. This often has the effect of making the items run -together in the output, with no space. The reason for this is that -juxtaposing two string expressions in `awk' means to concatenate -them. For example, without the comma: - - awk '{ print $1 $2 }' inventory-shipped - -prints: - - Jan13 - Feb15 - Mar15 - ... - - Neither example's output makes much sense to someone unfamiliar with -the file `inventory-shipped'. A heading line at the beginning would -make it clearer. Let's add some headings to our table of months -(`$1') and green crates shipped (`$2'). We do this using the BEGIN -pattern (*note BEGIN/END::.) to cause the headings to be printed only -once: - - awk 'BEGIN { print "Month Crates" - print "---- -----" } - { print $1, $2 }' inventory-shipped - -Did you already guess what will happen? This program prints the -following: - - Month Crates - ---- ----- - Jan 13 - Feb 15 - Mar 15 - ... - - The headings and the table data don't line up! We can fix this by -printing some spaces between the two fields: - - awk 'BEGIN { print "Month Crates" - print "---- -----" } - { print $1, " ", $2 }' inventory-shipped - -You can imagine that this way of lining up columns can get pretty -complicated when you have many columns to fix. Counting spaces for -two or three columns can be simple, but more than this and you can -get ``lost'' quite easily. This is why the `printf' statement was -created (*note Printf::.); one of its specialties is lining up -columns of data. - - - -File: gawk-info, Node: Output Separators, Next: Redirection, Prev: Print Examples, Up: Printing - -Output Separators -================= - -As mentioned previously, a `print' statement contains a list of -items, separated by commas. In the output, the items are normally -separated by single spaces. But they do not have to be spaces; a -single space is only the default. You can specify any string of -characters to use as the "output field separator", by setting the -special variable `OFS'. The initial value of this variable is the -string `" "'. - -The output from an entire `print' statement is called an "output -record". Each `print' statement outputs one output record and then -outputs a string called the "output record separator". The special -variable `ORS' specifies this string. The initial value of the -variable is the string `"\n"' containing a newline character; thus, -normally each `print' statement makes a separate line. - -You can change how output fields and records are separated by -assigning new values to the variables `OFS' and/or `ORS'. The usual -place to do this is in the `BEGIN' rule (*note BEGIN/END::.), so that -it happens before any input is processed. You may also do this with -assignments on the command line, before the names of your input files. - -The following example prints the first and second fields of each -input record separated by a semicolon, with a blank line added after -each line: - - awk 'BEGIN { OFS = ";"; ORS = "\n\n" } - { print $1, $2 }' BBS-list - -If the value of `ORS' does not contain a newline, all your output -will be run together on a single line, unless you output newlines -some other way. - - - -File: gawk-info, Node: Redirection, Next: Printf, Prev: Output Separators, Up: Printing - -Redirecting Output of `print' and `printf' -========================================== - -So far we have been dealing only with output that prints to the -standard output, usually your terminal. Both `print' and `printf' -can be told to send their output to other places. This is called -"redirection". - -A redirection appears after the `print' or `printf' statement. -Redirections in `awk' are written just like redirections in shell -commands, except that they are written inside the `awk' program. - -Here are the three forms of output redirection. They are all shown -for the `print' statement, but they work for `printf' also. - -`print ITEMS > OUTPUT-FILE' - This type of redirection prints the items onto the output file - OUTPUT-FILE. The file name OUTPUT-FILE can be any expression. - Its value is changed to a string and then used as a filename - (*note Expressions::.). - - When this type of redirection is used, the OUTPUT-FILE is erased - before the first output is written to it. Subsequent writes do - not erase OUTPUT-FILE, but append to it. If OUTPUT-FILE does - not exist, then it is created. - - For example, here is how one `awk' program can write a list of - BBS names to a file `name-list' and a list of phone numbers to a - file `phone-list'. Each output file contains one name or number - per line. - - awk '{ print $2 > "phone-list" - print $1 > "name-list" }' BBS-list - -`print ITEMS >> OUTPUT-FILE' - This type of redirection prints the items onto the output file - OUTPUT-FILE. The difference between this and the single--`>' - redirection is that the old contents (if any) of OUTPUT-FILE are - not erased. Instead, the `awk' output is appended to the file. - -`print ITEMS | COMMAND' - It is also possible to send output through a "pipe" instead of - into a file. This type of redirection opens a pipe to COMMAND - and writes the values of ITEMS through this pipe, to another - process created to execute COMMAND. - - The redirection argument COMMAND is actually an `awk' - expression. Its value is converted to a string, whose contents - give the shell command to be run. - - For example, this produces two files, one unsorted list of BBS - names and one list sorted in reverse alphabetical order: - - awk '{ print $1 > "names.unsorted" - print $1 | "sort -r > names.sorted" }' BBS-list - - Here the unsorted list is written with an ordinary redirection - while the sorted list is written by piping through the `sort' - utility. - - Here is an example that uses redirection to mail a message to a - mailing list `bug-system'. This might be useful when trouble is - encountered in an `awk' script run periodically for system - maintenance. - - print "Awk script failed:", $0 | "mail bug-system" - print "processing record number", FNR, "of", FILENAME | "mail bug-system" - close ("mail bug-system") - - We use a `close' statement here because it's a good idea to - close the pipe as soon as all the intended output has been sent - to it. *Note Close Output::, for more information on this. - -Redirecting output using `>', `>>', or `|' asks the system to open a -file or pipe only if the particular FILE or COMMAND you've specified -has not already been written to by your program. - - - -File: gawk-info, Node: Close Output, Up: Redirection - -Closing Output Files and Pipes ------------------------------- - -When a file or pipe is opened, the filename or command associated -with it is remembered by `awk' and subsequent writes to the same file -or command are appended to the previous writes. The file or pipe -stays open until `awk' exits. This is usually convenient. - -Sometimes there is a reason to close an output file or pipe earlier -than that. To do this, use the `close' command, as follows: - - close (FILENAME) - -or - - close (COMMAND) - -The argument FILENAME or COMMAND can be any expression. Its value -must exactly equal the string used to open the file or pipe to begin -with--for example, if you open a pipe with this: - - print $1 | "sort -r > names.sorted" - -then you must close it with this: - - close ("sort -r > names.sorted") - -Here are some reasons why you might need to close an output file: - - * To write a file and read it back later on in the same `awk' - program. Close the file when you are finished writing it; then - you can start reading it with `getline' (*note Getline::.). - - * To write numerous files, successively, in the same `awk' - program. If you don't close the files, eventually you will - exceed the system limit on the number of open files in one - process. So close each one when you are finished writing it. - - * To make a command finish. When you redirect output through a - pipe, the command reading the pipe normally continues to try to - read input as long as the pipe is open. Often this means the - command cannot really do its work until the pipe is closed. For - example, if you redirect output to the `mail' program, the - message will not actually be sent until the pipe is closed. - - * To run the same subprogram a second time, with the same arguments. - This is not the same thing as giving more input to the first run! - - For example, suppose you pipe output to the `mail' program. If - you output several lines redirected to this pipe without closing - it, they make a single message of several lines. By contrast, - if you close the pipe after each line of output, then each line - makes a separate message. - - - -File: gawk-info, Node: Printf, Prev: Redirection, Up: Printing - -Using `printf' Statements For Fancier Printing -============================================== - -If you want more precise control over the output format than `print' -gives you, use `printf'. With `printf' you can specify the width to -use for each item, and you can specify various stylistic choices for -numbers (such as what radix to use, whether to print an exponent, -whether to print a sign, and how many digits to print after the -decimal point). You do this by specifying a "format string". - -* Menu: - -* Basic Printf:: Syntax of the `printf' statement. -* Format-Control:: Format-control letters. -* Modifiers:: Format--specification modifiers. -* Printf Examples:: Several examples. - - - -File: gawk-info, Node: Basic Printf, Next: Format-Control, Up: Printf - -Introduction to the `printf' Statement --------------------------------------- - -The `printf' statement looks like this: - - printf FORMAT, ITEM1, ITEM2, ... - - The entire list of items may optionally be enclosed in parentheses. -The parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a -redirection (*note Redirection::.). The relational operators are -`==', `!=', `<', `>', `>=', `<=', `~' and `!~' (*note Comparison -Ops::.). - -The difference between `printf' and `print' is the argument FORMAT. -This is an expression whose value is taken as a string; its job is to -say how to output each of the other arguments. It is called the -"format string". - -The format string is essentially the same as in the C library -function `printf'. Most of FORMAT is text to be output verbatim. -Scattered among this text are "format specifiers", one per item. -Each format specifier says to output the next item at that place in -the format. - -The `printf' statement does not automatically append a newline to its -output. It outputs nothing but what the format specifies. So if you -want a newline, you must include one in the format. The output -separator variables `OFS' and `ORS' have no effect on `printf' -statements. - - - -File: gawk-info, Node: Format-Control, Next: Modifiers, Prev: Basic Printf, Up: Printf - -Format--Control Characters --------------------------- - -A format specifier starts with the character `%' and ends with a -"format--control letter"; it tells the `printf' statement how to -output one item. (If you actually want to output a `%', write `%%'.) -The format--control letter specifies what kind of value to print. -The rest of the format specifier is made up of optional "modifiers" -which are parameters such as the field width to use. - -Here is a list of them: - -`c' - This prints a number as an ASCII character. Thus, `printf "%c", - 65' outputs the letter `A'. The output for a string value is - the first character of the string. - -`d' - This prints a decimal integer. - -`e' - This prints a number in scientific (exponential) notation. For - example, - - printf "%4.3e", 1950 - - prints `1.950e+03', with a total of 4 significant figures of - which 3 follow the decimal point. The `4.3' are "modifiers", - discussed below. - -`f' - This prints a number in floating point notation. - -`g' - This prints either scientific notation or floating point - notation, whichever is shorter. - -`o' - This prints an unsigned octal integer. - -`s' - This prints a string. - -`x' - This prints an unsigned hexadecimal integer. - -`%' - This isn't really a format--control letter, but it does have a - meaning when used after a `%': the sequence `%%' outputs one - `%'. It does not consume an argument. - - - -File: gawk-info, Node: Modifiers, Next: Printf Examples, Prev: Format-Control, Up: Printf - -Modifiers for `printf' Formats ------------------------------- - -A format specification can also include "modifiers" that can control -how much of the item's value is printed and how much space it gets. -The modifiers come between the `%' and the format--control letter. -Here are the possible modifiers, in the order in which they may appear: - -`-' - The minus sign, used before the width modifier, says to - left--justify the argument within its specified width. Normally - the argument is printed right--justified in the specified width. - -`WIDTH' - This is a number representing the desired width of a field. - Inserting any number between the `%' sign and the format control - character forces the field to be expanded to this width. The - default way to do this is to pad with spaces on the left. - -`.PREC' - This is a number that specifies the precision to use when - printing. This specifies the number of digits you want printed - to the right of the decimal place. - -The C library `printf''s dynamic WIDTH and PREC capability (for -example, `"%*.*s"') is not supported. However, it can be easily -simulated using concatenation to dynamically build the format string. - - - -File: gawk-info, Node: Printf Examples, Prev: Modifiers, Up: Printf - -Examples of Using `printf' --------------------------- - -Here is how to use `printf' to make an aligned table: - - awk '{ printf "%-10s %s\n", $1, $2 }' BBS-list - -prints the names of bulletin boards (`$1') of the file `BBS-list' as -a string of 10 characters, left justified. It also prints the phone -numbers (`$2') afterward on the line. This will produce an aligned -two--column table of names and phone numbers, like so: - - aardvark 555-5553 - alpo-net 555-3412 - barfly 555-7685 - bites 555-1675 - camelot 555-0542 - core 555-2912 - fooey 555-1234 - foot 555-6699 - macfoo 555-6480 - sdace 555-3430 - sabafoo 555-2127 - -Did you notice that we did not specify that the phone numbers be -printed as numbers? They had to be printed as strings because the -numbers are separated by a dash. This dash would be interpreted as a -"minus" sign if we had tried to print the phone numbers as numbers. -This would have led to some pretty confusing results. - -We did not specify a width for the phone numbers because they are the -last things on their lines. We don't need to put spaces after them. - -We could make our table look even nicer by adding headings to the -tops of the columns. To do this, use the BEGIN pattern (*note -BEGIN/END::.) to cause the header to be printed only once, at the -beginning of the `awk' program: - - awk 'BEGIN { print "Name Number" - print "--- -----" } - { printf "%-10s %s\n", $1, $2 }' BBS-list - -Did you notice that we mixed `print' and `printf' statements in the -above example? We could have used just `printf' statements to get -the same results: - - awk 'BEGIN { printf "%-10s %s\n", "Name", "Number" - printf "%-10s %s\n", "---", "-----" } - { printf "%-10s %s\n", $1, $2 }' BBS-list - -By outputting each column heading with the same format specification -used for the elements of the column, we have made sure that the -headings will be aligned just like the columns. - -The fact that the same format specification is used can be emphasized -by storing it in a variable, like so: - - awk 'BEGIN { format = "%-10s %s\n" - printf format, "Name", "Number" - printf format, "---", "-----" } - { printf format, $1, $2 }' BBS-list - -See if you can use the `printf' statement to line up the headings and -table data for our `inventory-shipped' example covered earlier in the -section on the `print' statement (*note Print::.). - - - -File: gawk-info, Node: One-liners, Next: Patterns, Prev: Printing, Up: Top - -Useful ``One-liners'' -********************* - -Useful `awk' programs are often short, just a line or two. Here is a -collection of useful, short programs to get you started. Some of -these programs contain constructs that haven't been covered yet. The -description of the program will give you a good idea of what is going -on, but please read the rest of the manual to become an `awk' expert! - -`awk '{ num_fields = num_fields + NF }' -`` END { print num_fields }''' - This program prints the total number of fields in all input lines. - -`awk 'length($0) > 80'' - This program prints every line longer than 80 characters. The - sole rule has a relational expression as its pattern, and has no - action (so the default action, printing the record, is used). - -`awk 'NF > 0'' - This program prints every line that has at least one field. - This is an easy way to delete blank lines from a file (or - rather, to create a new file similar to the old file but from - which the blank lines have been deleted). - -`awk '{ if (NF > 0) print }'' - This program also prints every line that has at least one field. - Here we allow the rule to match every line, then decide in the - action whether to print. - -`awk 'BEGIN { for (i = 1; i <= 7; i++)' -`` print int(101 * rand()) }''' - This program prints 7 random numbers from 0 to 100, inclusive. - -`ls -l FILES | awk '{ x += $4 } ; END { print "total bytes: " x }'' - This program prints the total number of bytes used by FILES. - -`expand FILE | awk '{ if (x < length()) x = length() }' -`` END { print "maximum line length is " x }''' - This program prints the maximum line length of FILE. The input - is piped through the `expand' program to change tabs into - spaces, so the widths compared are actually the right--margin - columns. - - diff --git a/gawk-info-3 b/gawk-info-3 deleted file mode 100644 index b333f57c..00000000 --- a/gawk-info-3 +++ /dev/null @@ -1,1385 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: Patterns, Next: Actions, Prev: One-liners, Up: Top - -Patterns -******** - -Patterns control the execution of rules: a rule is executed when its -pattern matches the input record. The `awk' language provides -several special patterns that are described in the sections that -follow. Patterns include: - -NULL - The empty pattern, which matches every input record. (*Note The - Empty Pattern: Empty.) - -/REGULAR EXPRESSION/ - A regular expression as a pattern. It matches when the text of - the input record fits the regular expression. (*Note Regular - Expressions as Patterns: Regexp.) - -CONDEXP - A single comparison expression. It matches when it is true. - (*Note Comparison Expressions as Patterns: Comparison Patterns.) - -`BEGIN' -`END' - Special patterns to supply start--up or clean--up information to - `awk'. (*Note Specifying Record Ranges With Patterns: BEGIN/END.) - -PAT1, PAT2 - A pair of patterns separated by a comma, specifying a range of - records. (*Note Specifying Record Ranges With Patterns: Ranges.) - -CONDEXP1 BOOLEAN CONDEXP2 - A "compound" pattern, which combines expressions with the - operators `and', `&&', and `or', `||'. (*Note Boolean - Operators and Patterns: Boolean.) - -! CONDEXP - The pattern CONDEXP is evaluated. Then the `!' performs a - boolean ``not'' or logical negation operation; if the input line - matches the pattern in CONDEXP then the associated action is - *not* executed. If the input line did not match that pattern, - then the action *is* executed. (*Note Boolean Operators and - Patterns: Boolean.) - -(EXPR) - Parentheses may be used to control how operators nest. - -PAT1 ? PAT2 : PAT3 - The first pattern is evaluated. If it is true, the input line - is tested against the second pattern, otherwise it is tested - against the third. (*Note Conditional Patterns: Conditional - Patterns.) - -* Menu: - -The following subsections describe these forms in detail: - -* Empty:: The empty pattern, which matches every record. - -* Regexp:: Regular expressions such as `/foo/'. - -* Comparison Patterns:: Comparison expressions such as `$1 > 10'. - -* Boolean:: Combining comparison expressions. - -* Ranges:: Using pairs of patterns to specify record ranges. - -* BEGIN/END:: Specifying initialization and cleanup rules. - -* Conditional Patterns:: Patterns such as `pat1 ? pat2 : pat3'. - - - -File: gawk-info, Node: Empty, Next: Regexp, Up: Patterns - -The Empty Pattern -================= - -An empty pattern is considered to match *every* input record. For -example, the program: - - awk '{ print $1 }' BBS-list - -prints just the first field of every record. - - - -File: gawk-info, Node: Regexp, Next: Comparison Patterns, Prev: Empty, Up: Patterns - -Regular Expressions as Patterns -=============================== - -A "regular expression", or "regexp", is a way of describing classes -of strings. When enclosed in slashes (`/'), it makes an `awk' -pattern that matches every input record that contains a match for the -regexp. - -The simplest regular expression is a sequence of letters, numbers, or -both. Such a regexp matches any string that contains that sequence. -Thus, the regexp `foo' matches any string containing `foo'. (More -complicated regexps let you specify classes of similar strings.) - -* Menu: - -* Usage: Regexp Usage. How regexps are used in patterns. -* Operators: Regexp Operators. How to write a regexp. - - - -File: gawk-info, Node: Regexp Usage, Next: Regexp Operators, Up: Regexp - -How to use Regular Expressions ------------------------------- - -When you enclose `foo' in slashes, you get a pattern that matches a -record that contains `foo'. For example, this prints the second -field of each record that contains `foo' anywhere: - - awk '/foo/ { print $2 }' BBS-list - -Regular expressions can also be used in comparison expressions. Then -you can specify the string to match against; it need not be the -entire current input record. These comparison expressions can be -used as patterns or in `if' and `while' statements. - -`EXP ~ /REGEXP/' - This is true if the expression EXP (taken as a character string) - is matched by REGEXP. The following example matches, or - selects, all input records with the letter `J' in the first field: - - awk '$1 ~ /J/' inventory-shipped - - So does this: - - awk '{ if ($1 ~ /J/) print }' inventory-shipped - -`EXP !~ /REGEXP/' - This is true if the expression EXP (taken as a character string) - is *not* matched by REGEXP. The following example matches, or - selects, all input records whose first field *does not* contain - the letter `J': - - awk '$1 !~ /J/' inventory-shipped - -The right hand side of a `~' or `!~' operator need not be a constant -regexp (i.e. a string of characters between `/'s). It can also be -"computed", or "dynamic". For example: - - identifier = "[A-Za-z_][A-Za-z_0-9]+" - $0 ~ identifier - -sets `identifier' to a regexp that describes `awk' variable names, -and tests if the input record matches this regexp. - -A dynamic regexp may actually be any expression. The expression is -evaluated, and the result is treated as a string that describes a -regular expression. - - - -File: gawk-info, Node: Regexp Operators, Prev: Regexp Usage, Up: Regexp - -Regular Expression Operators ----------------------------- - -You can combine regular expressions with the following characters, -called "regular expression operators", or "metacharacters", to -increase the power and versatility of regular expressions. This is a -table of metacharacters: - -`\' - This is used to suppress the special meaning of a character when - matching. For example: - - \$ - - matches the character `$'. - -`^' - This matches the beginning of the string or the beginning of a - line within the string. For example: - - ^@chapter - - matches the `@chapter' at the beginning of a string, and can be - used to identify chapter beginnings in Texinfo source files. - -`$' - This is similar to `^', but it matches only at the end of a - string or the end of a line within the string. For example: - - /p$/ - - as a pattern matches a record that ends with a `p'. - -`.' - This matches any single character except a newline. For example: - - .P - - matches any single character followed by a `P' in a string. - Using concatenation we can make regular expressions like `U.A', - which matches any three--character string that begins with `U' - and ends with `A'. - -`[...]' - This is called a "character set". It matches any one of a group - of characters that are enclosed in the square brackets. For - example: - - [MVX] - - matches any of the characters `M', `V', or `X' in a string. - - Ranges of characters are indicated by using a hyphen between the - beginning and ending characters, and enclosing the whole thing - in brackets. For example: - - [0-9] - - matches any string that contains a digit. - - Note that special patterns have to be followed to match the - characters, `]', `-', and `^' when they are enclosed in the - square brackets. To match a `]', make it the first character in - the set. For example: - - []d] - - matches either `]', or `d'. - - To match `-', write it as `--', which is a range containing only - `-'. You may also make the `-' be the first or last character - in the set. To match `^', make it any character except the - first one of a set. - -`[^ ...]' - This is the "complemented character set". The first character - after the `[' *must* be a `^'. This matches any characters - *except* those in the square brackets. For example: - - [^0-9] - - matches any characters that are not digits. - -`|' - This is the "alternation operator" and it is used to specify - alternatives. For example: - - ^P|[0-9] - - matches any string that matches either `^P' or `[0-9]'. This - means it matches any string that contains a digit or starts with - `P'. - -`(...)' - Parentheses are used for grouping in regular expressions as in - arithmetic. They can be used to concatenate regular expressions - containing the alternation operator, `|'. - -`*' - This symbol means that the preceding regular expression is to be - repeated as many times as possible to find a match. For example: - - ph* - - applies the `*' symbol to the preceding `h' and looks for - matches to one `p' followed by any number of `h''s. This will - also match just `p' if no `h''s are present. - - The `*' means repeat the *smallest* possible preceding - expression in order to find a match. The `awk' language - processes a `*' by matching as many repetitions as can be found. - For example: - - awk '/\(c[ad][ad]*r x\)/ { print }' sample - - matches every record in the input containing a string of the - form `(car x)', `(cdr x)', `(cadr x)', and so on. - -`+' - This symbol is similar to `*', but the preceding expression must - be matched at least once. This means that: - - wh+y - - would match `why' and `whhy' but not `wy', whereas `wh*y' would - match all three of these strings. And this is a simpler way of - writing the last `*' example: - - awk '/\(c[ad]+r x\)/ { print }' sample - -`?' - This symbol is similar to `*', but the preceding expression can - be matched once or not at all. For example: - - fe?d - - will match `fed' or `fd', but nothing else. - -In regular expressions, the `*', `+', and `?' operators have the -highest precedence, followed by concatenation, and finally by `|'. -As in arithmetic, parentheses can change how operators are grouped. - -Any other character stands for itself. However, it is important to -note that case in regular expressions *is* significant, both when -matching ordinary (i.e. non--metacharacter) characters, and inside -character sets. Thus a `w' in a regular expression matches only a -lower case `w' and not either an uppercase or lowercase `w'. When -you want to do a case--independent match, you have to use a character -set: `[Ww]'. - - - -File: gawk-info, Node: Comparison Patterns, Next: Ranges, Prev: Regexp, Up: Patterns - -Comparison Expressions as Patterns -================================== - -"Comparison patterns" use "relational operators" to compare strings -or numbers. The relational operators are the same as in C. Here is -a table of them: - -`X < Y' - True if X is less than Y. - -`X <= Y' - True if X is less than or equal to Y. - -`X > Y' - True if X is greater than Y. - -`X >= Y' - True if X is greater than or equal to Y. - -`X == Y' - True if X is equal to Y. - -`X != Y' - True if X is not equal to Y. - -Comparison expressions can be used as patterns to control whether a -rule is executed. The expression is evaluated for each input record -read, and the pattern is considered matched if the condition is "true". - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (*note Conversion::.). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, `"10"' is less than `"9"'. - -The following example prints the second field of each input record -whose first field is precisely `foo'. - - awk '$1 == "foo" { print $2 }' BBS-list - -Contrast this with the following regular expression match, which -would accept any record with a first field that contains `foo': - - awk '$1 ~ "foo" { print $2 }' BBS-list - - - -File: gawk-info, Node: Ranges, Next: BEGIN/END, Prev: Comparison Patterns, Up: Patterns - -Specifying Record Ranges With Patterns -====================================== - -A "range pattern" is made of two patterns separated by a comma: -`BEGPAT, ENDPAT'. It matches ranges of consecutive input records. -The first pattern BEGPAT controls where the range begins, and the -second one ENDPAT controls where it ends. - -They work as follows: BEGPAT is matched against every input record; -when a record matches BEGPAT, the range pattern becomes "turned on". -The range pattern matches this record. As long as it stays turned -on, it automatically matches every input record read. But meanwhile, -ENDPAT is matched against every input record, and when it matches, -the range pattern is turned off again for the following record. Now -we go back to checking BEGPAT against each record. For example: - - awk '$1 == "on", $1 == "off"' - -prints every record between on/off pairs, inclusive. - -The record that turns on the range pattern and the one that turns it -off both match the range pattern. If you don't want to operate on -these records, you can write `if' statements in the rule's action to -distinguish them. - -It is possible for a pattern to be turned both on and off by the same -record, if both conditions are satisfied by that record. Then the -action is executed for just that record. - - - -File: gawk-info, Node: BEGIN/END, Next: Boolean, Prev: Ranges, Up: Patterns - -`BEGIN' and `END' Special Patterns -================================== - -`BEGIN' and `END' are special patterns. They are not used to match -input records. Rather, they are used for supplying start--up or -clean--up information to your `awk' script. A `BEGIN' rule is -executed, once, before the first input record has been read. An -`END' rule is executed, once, after all the input has been read. For -example: - - awk 'BEGIN { print "Analysis of ``foo'' program" } - /foo/ { ++foobar } - END { print "``foo'' appears " foobar " times." }' BBS-list - -This program finds out how many times the string `foo' appears in the -input file `BBS-list'. The `BEGIN' pattern prints out a title for -the report. There is no need to use the `BEGIN' pattern to -initialize the counter `foobar' to zero, as `awk' does this for us -automatically (*note Variables::.). The second rule increments the -variable `foobar' every time a record containing the pattern `foo' is -read. The last rule prints out the value of `foobar' at the end of -the run. - -The special patterns `BEGIN' and `END' do not combine with other -kinds of patterns. - -An `awk' program may have multiple `BEGIN' and/or `END' rules. The -contents of multiple `BEGIN' or `END' rules are treated as if they -had been enclosed in a single rule, in the order that the rules are -encountered in the `awk' program. (This feature was introduced with -the new version of `awk'.) - -Multiple `BEGIN' and `END' sections are also useful for writing -library functions that need to do initialization and/or cleanup of -their own. Note that the order in which library functions are named -on the command line will affect the order in which their `BEGIN' and -`END' rules will be executed. Therefore you have to be careful how -you write your library functions. (*Note Command Line::, for more -information on using library functions.) - -If an `awk' program only has a `BEGIN' rule, and no other rules, then -the program will exit after the `BEGIN' rule has been run. Older -versions of `awk' used to read their input until end of file was -seen. However, if an `END' rule exists as well, then the input will -be read, even if there are no other rules in the program. - -`BEGIN' and `END' rules must have actions; there is no default action -for these rules since there is no current record when they run. - - - -File: gawk-info, Node: Boolean, Next: Conditional Patterns, Prev: BEGIN/END, Up: Patterns - -Boolean Operators and Patterns -============================== - -A boolean pattern is a combination of other patterns using the -boolean operators ``or'' (`||'), ``and'' (`&&'), and ``not'' (`!'), -along with parentheses to control nesting. Whether the boolean -pattern matches an input record is computed from whether its -subpatterns match. - -The subpatterns of a boolean pattern can be regular expressions, -matching expressions, comparisons, or other boolean combinations of -such. Range patterns cannot appear inside boolean operators, since -they don't make sense for classifying a single record, and neither -can the special patterns `BEGIN' and `END', which never match any -input record. - -Here are descriptions of the three boolean operators. - -`PAT1 && PAT2' - Matches if both PAT1 and PAT2 match by themselves. For example, - the following command prints all records in the input file - `BBS-list' that contain both `2400' and `foo'. - - awk '/2400/ && /foo/' BBS-list - - Whether PAT2 matches is tested only if PAT1 succeeds. This can - make a difference when PAT2 contains expressions that have side - effects: in the case of `/foo/ && ($2 == bar++)', the variable - `bar' is not incremented if there is no `foo' in the record. - -`PAT1 || PAT2' - Matches if at least one of PAT1 and PAT2 matches the current - input record. For example, the following command prints all - records in the input file `BBS-list' that contain *either* - `2400' or `foo', or both. - - awk '/2400/ || /foo/' BBS-list - - Whether PAT2 matches is tested only if PAT1 fails to match. - This can make a difference when PAT2 contains expressions that - have side effects. - -`!PAT' - Matches if PAT does not match. For example, the following - command prints all records in the input file `BBS-list' that do - *not* contain the string `foo'. - - awk '! /foo/' BBS-list - -Note that boolean patterns are built from other patterns just as -boolean expressions are built from other expressions (*note Boolean -Ops::.). Any boolean expression is also a valid boolean pattern. -But the converse is not true: simple regular expression patterns such -as `/foo/' are not allowed in boolean expressions. Regular -expressions can appear in boolean expressions only in conjunction -with the matching operators, `~' and `!~'. - - - -File: gawk-info, Node: Conditional Patterns, Prev: Boolean, Up: Patterns - -Conditional Patterns -==================== - -Patterns may use a "conditional expression" much like the conditional -expression of the C language. This takes the form: - - PAT1 ? PAT2 : PAT3 - -The first pattern is evaluated. If it evaluates to TRUE, then the -input record is tested against PAT2. Otherwise it is tested against -PAT3. The conditional pattern matches if PAT2 or PAT3 (whichever one -is selected) matches. - - - -File: gawk-info, Node: Actions, Next: Expressions, Prev: Patterns, Up: Top - -Actions: The Basics -******************* - -The "action" part of an `awk' rule tells `awk' what to do once a -match for the pattern is found. An action consists of one or more -`awk' "statements", enclosed in curly braces (`{' and `}'). The -curly braces must be used even if the action contains only one -statement, or even if it contains no statements at all. Action -statements are separated by newlines or semicolons. - -Besides the print statements already covered (*note Printing::.), -there are four kinds of action statements: expressions, control -statements, compound statements, and function definitions. - - * "Expressions" include assignments, arithmetic, function calls, - and more (*note Expressions::.). - - * "Control statements" specify the control flow of `awk' programs. - The `awk' language gives you C--like constructs (`if', `for', - `while', and so on) as well as a few special ones (*note - Statements::.). - - * A "compound statement" is just one or more `awk' statements - enclosed in curly braces. This way you can group several - statements to form the body of an `if' or similar statement. - - * You can define "user--defined functions" for use elsewhere in - the `awk' program (*note User-defined::.). - - - -File: gawk-info, Node: Expressions, Next: Statements, Prev: Actions, Up: Top - -Actions: Expressions -******************** - -Expressions are the basic building block of `awk' actions. An -expression evaluates to a value, which you can print, test, store in -a variable or pass to a function. - -But, beyond that, an expression can assign a new value to a variable -or a field, with an assignment operator. - -An expression can serve as a statement on its own. Most other action -statements are made up of various combinations of expressions. As in -other languages, expressions in `awk' include variables, array -references, constants, and function calls, as well as combinations of -these with various operators. - -* Menu: - -* Constants:: String and numeric constants. -* Variables:: Variables give names to values for future use. -* Fields:: Field references such as `$1' are also expressions. -* Arrays:: Array element references are expressions. - -* Arithmetic Ops:: Arithmetic operations (`+', `-', etc.) -* Concatenation:: Concatenating strings. -* Comparison Ops:: Comparison of numbers and strings with `<', etc. -* Boolean Ops:: Combining comparison expressions using boolean operators - `||' (``or''), `&&' (``and'') and `!' (``not''). - -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. - -* Conversion:: The conversion of strings to numbers and vice versa. -* Conditional Exp:: Conditional expressions select between two subexpressions - under control of a third subexpression. -* Function Calls:: A function call is an expression. - - - -File: gawk-info, Node: Constants, Next: Variables, Up: Expressions - -Constant Expressions -==================== - -There are two types of constants: numeric constants and string -constants. - -The "numeric constant" is a number. This number can be an integer, a -decimal fraction, or a number in scientific (exponential) notation. -Note that all numeric values are represented within `awk' in -double--precision floating point. Here are some examples of numeric -constants, which all have the same value: - - 105 - 1.05e+2 - 1050e-1 - -A string constant consists of a sequence of characters enclosed in -double--quote marks. For example: - - "parrot" - -represents the string constant `parrot'. Strings in `gawk' can be of -any length and they can contain all the possible 8--bit ASCII -characters including ASCII NUL. Other `awk' implementations may have -difficulty with some character codes. - -Some characters cannot be included literally in a string. You -represent them instead with "escape sequences", which are character -sequences beginning with a backslash (`\'). - -One use of the backslash is to include double--quote characters in a -string. Since a plain double--quote would end the string, you must -use `\"'. Backslash itself is another character that can't be -included normally; you write `\\' to put one backslash in the string. - -Another use of backslash is to represent unprintable characters such -as newline. While there is nothing to stop you from writing these -characters directly in an `awk' program, they may look ugly. - -`\b' - Represents a backspaced, H'. - -`\f' - Represents a formfeed, L'. - -`\n' - Represents a newline, J'. - -`\r' - Represents a carriage return, M'. - -`\t' - Represents a horizontal tab, I'. - -`\v' - Represents a vertical tab, K'. - -`\NNN' - Represents the octal value NNN, where NNN is one to three digits - between 0 and 7. For example, the code for the ASCII ESC - (escape) character is `\033'. - - - -File: gawk-info, Node: Variables, Next: Arithmetic Ops, Prev: Constants, Up: Expressions - -Variables -========= - -Variables let you give names to values and refer to them later. You -have already seen variables in many of the examples. The name of a -variable must be a sequence of letters, digits and underscores, but -it may not begin with a digit. Case is significant in variable -names; `a' and `A' are distinct variables. - -A variable name is a valid expression by itself; it represents the -variable's current value. Variables are given new values with -"assignment operators" and "increment operators". *Note Assignment -Ops::. - -A few variables have special built--in meanings, such as `FS', the -field separator, and `NF', the number of fields in the current input -record. *Note Special::, for a list of them. Special variables can -be used and assigned just like all other variables, but their values -are also used or changed automatically by `awk'. Each special -variable's name is made entirely of upper case letters. - -Variables in `awk' can be assigned either numeric values or string -values. By default, variables are initialized to the null string, -which has the numeric value zero. So there is no need to -``initialize'' each variable explicitly in `awk', the way you would -need to do in C or most other traditional programming languages. - - - -File: gawk-info, Node: Arithmetic Ops, Next: Concatenation, Prev: Variables, Up: Expressions - -Arithmetic Operators -==================== - -The `awk' language uses the common arithmetic operators when -evaluating expressions. All of these arithmetic operators follow -normal precedence rules, and work as you would expect them to. This -example divides field 3 by field 4, adds field 2, stores the result -into field 1, and prints the results: - - awk '{ $1 = $2 + $3 / $4; print }' inventory-shipped - -The arithmetic operators in `awk' are: - -`X + Y' - Addition. - -`X - Y' - Subtraction. - -`- X' - Negation. - -`X / Y' - Division. Since all numbers in `awk' are double--precision - floating point, the result is not rounded to an integer: `3 / 4' - has the value 0.75. - -`X * Y' - Multiplication. - -`X % Y' - Remainder. The quotient is rounded toward zero to an integer, - multiplied by Y and this result is subtracted from X. This - operation is sometimes known as ``trunc--mod''. The following - relation always holds: - - `b * int(a / b) + (a % b) == a' - - One undesirable effect of this definition of remainder is that X - % Y is negative if X is negative. Thus, - - -17 % 8 = -1 - -`X ^ Y' -`X ** Y' - Exponentiation: X raised to the Y power. `2 ^ 3' has the value - 8. The character sequence `**' is equivalent to `^'. - - - -File: gawk-info, Node: Concatenation, Next: Comparison Ops, Prev: Arithmetic Ops, Up: Expressions - -String Concatenation -==================== - -There is only one string operation: concatenation. It does not have -a specific operator to represent it. Instead, concatenation is -performed by writing expressions next to one another, with no -operator. For example: - - awk '{ print "Field number one: " $1 }' BBS-list - -produces, for the first record in `BBS-list': - - Field number one: aardvark - -If you hadn't put the space after the `:', the line would have run -together. For example: - - awk '{ print "Field number one:" $1 }' BBS-list - -produces, for the first record in `BBS-list': - - Field number one:aardvark - - - -File: gawk-info, Node: Comparison Ops, Next: Boolean Ops, Prev: Concatenation, Up: Expressions - -Comparison Expressions -====================== - -"Comparison expressions" use "relational operators" to compare -strings or numbers. The relational operators are the same as in C. -Here is a table of them: - -`X < Y' - True if X is less than Y. - -`X <= Y' - True if X is less than or equal to Y. - -`X > Y' - True if X is greater than Y. - -`X >= Y' - True if X is greater than or equal to Y. - -`X == Y' - True if X is equal to Y. - -`X != Y' - True if X is not equal to Y. - -`X ~ REGEXP' - True if regexp REGEXP matches the string X. - -`X !~ REGEXP' - True if regexp REGEXP does not match the string X. - -`SUBSCRIPT in ARRAY' - True if array ARRAY has an element with the subscript SUBSCRIPT. - -Comparison expressions have the value 1 if true and 0 if false. - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (*note Conversion::.). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, `"10"' is less than `"9"'. - -For example, - - $1 == "foo" - -has the value of 1, or is true, if the first field of the current -input record is precisely `foo'. By contrast, - - $1 ~ /foo/ - -has the value 1 if the first field contains `foo'. - - - -File: gawk-info, Node: Boolean Ops, Next: Assignment Ops, Prev: Comparison Ops, Up: Expressions - -Boolean Operators -================= - -A boolean expression is combination of comparison expressions or -matching expressions, using the boolean operators ``or'' (`||'), -``and'' (`&&'), and ``not'' (`!'), along with parentheses to control -nesting. The truth of the boolean expression is computed by -combining the truth values of the component expressions. - -Boolean expressions can be used wherever comparison and matching -expressions can be used. They can be used in `if' and `while' -statements. They have numeric values (1 if true, 0 if false). - -In addition, every boolean expression is also a valid boolean -pattern, so you can use it as a pattern to control the execution of -rules. - -Here are descriptions of the three boolean operators, with an example -of each. It may be instructive to compare these examples with the -analogous examples of boolean patterns (*note Boolean::.), which use -the same boolean operators in patterns instead of expressions. - -`BOOLEAN1 && BOOLEAN2' - True if both BOOLEAN1 and BOOLEAN2 are true. For example, the - following statement prints the current input record if it - contains both `2400' and `foo'. - - if ($0 ~ /2400/ && $0 ~ /foo/) print - - The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is - true. This can make a difference when BOOLEAN2 contains - expressions that have side effects: in the case of `$0 ~ /foo/ - && ($2 == bar++)', the variable `bar' is not incremented if - there is no `foo' in the record. - -`BOOLEAN1 || BOOLEAN2' - True if at least one of BOOLEAN1 and BOOLEAN2 is true. For - example, the following command prints all records in the input - file `BBS-list' that contain *either* `2400' or `foo', or both. - - awk '{ if ($0 ~ /2400/ || $0 ~ /foo/) print }' BBS-list - - The subexpression BOOLEAN2 is evaluated only if BOOLEAN1 is - true. This can make a difference when BOOLEAN2 contains - expressions that have side effects. - -`!BOOLEAN' - True if BOOLEAN is false. For example, the following program - prints all records in the input file `BBS-list' that do *not* - contain the string `foo'. - - awk '{ if (! ($0 ~ /foo/)) print }' BBS-list - - - -File: gawk-info, Node: Assignment Ops, Next: Increment Ops, Prev: Boolean Ops, Up: Expressions - -Assignment Operators -==================== - -An "assignment" is an expression that stores a new value into a -variable. For example, let's assign the value 1 to the variable `z': - - z = 1 - -After this expression is executed, the variable `z' has the value 1. -Whatever old value `z' had before the assignment is forgotten. - -The `=' sign is called an "assignment operator". It is the simplest -assignment operator because the value of the right--hand operand is -stored unchanged. - -The left--hand operand of an assignment can be a variable (*note -Variables::.), a field (*note Changing Fields::.) or an array element -(*note Arrays::.). These are all called "lvalues", which means they -can appear on the left side of an assignment operator. The -right--hand operand may be any expression; it produces the new value -which the assignment stores in the specified variable, field or array -element. - -Assignments can store string values also. For example, this would -store the value `"this food is good"' in the variable `message': - - thing = "food" - predicate = "good" - message = "this " thing " is " predicate - -(This also illustrates concatenation of strings.) - -It is important to note that variables do *not* have permanent types. -The type of a variable is simply the type of whatever value it -happens to hold at the moment. In the following program fragment, -the variable `foo' has a numeric value at first, and a string value -later on: - - foo = 1 - print foo - foo = "bar" - print foo - -When the second assignment gives `foo' a string value, the fact that -it previously had a numeric value is forgotten. - -An assignment is an expression, so it has a value: the same value -that is assigned. Thus, `z = 1' as an expression has the value 1. -One consequence of this is that you can write multiple assignments -together: - - x = y = z = 0 - -stores the value 0 in all three variables. It does this because the -value of `z = 0', which is 0, is stored into `y', and then the value -of `y = z = 0', which is 0, is stored into `x'. - -You can use an assignment anywhere an expression is called for. For -example, it is valid to write `x != (y = 1)' to set `y' to 1 and then -test whether `x' equals 1. But this style tends to make programs -hard to read; except in a one--shot program, you should rewrite it to -get rid of such nesting of assignments. This is never very hard. - -Aside from `=', there are several other assignment operators that do -arithmetic with the old value of the variable. For example, the -operator `+=' computes a new value by adding the right--hand value to -the old value of the variable. Thus, the following assignment adds 5 -to the value of `foo': - - foo += 5 - -This is precisely equivalent to the following: - - foo = foo + 5 - -Use whichever one makes the meaning of your program clearer. - -Here is a table of the arithmetic assignment operators. In each -case, the right--hand operand is an expression whose value is -converted to a number. - -`LVALUE += INCREMENT' - Adds INCREMENT to the value of LVALUE to make the new value of - LVALUE. - -`LVALUE -= DECREMENT' - Subtracts DECREMENT from the value of LVALUE. - -`LVALUE *= COEFFICIENT' - Multiplies the value of LVALUE by COEFFICIENT. - -`LVALUE /= QUOTIENT' - Divides the value of LVALUE by QUOTIENT. - -`LVALUE %= MODULUS' - Sets LVALUE to its remainder by MODULUS. - -`LVALUE ^= POWER' -`LVALUE **= POWER' - Raises LVALUE to the power POWER. - - - -File: gawk-info, Node: Increment Ops, Next: Conversion, Prev: Assignment Ops, Up: Expressions - -Increment Operators -=================== - -"Increment operators" increase or decrease the value of a variable by -1. You could do the same thing with an assignment operator, so the -increment operators add no power to the `awk' language; but they are -convenient abbreviations for something very common. - -The operator to add 1 is written `++'. There are two ways to use -this operator: pre--incrementation and post--incrementation. - -To pre--increment a variable V, write `++V'. This adds 1 to the -value of V and that new value is also the value of this expression. -The assignment expression `V += 1' is completely equivalent. - -Writing the `++' after the variable specifies post--increment. This -increments the variable value just the same; the difference is that -the value of the increment expression itself is the variable's *old* -value. Thus, if `foo' has value 4, then the expression `foo++' has -the value 4, but it changes the value of `foo' to 5. - -The post--increment `foo++' is nearly equivalent to writing `(foo += -1) - 1'. It is not perfectly equivalent because all numbers in `awk' -are floating point: in floating point, `foo + 1 - 1' does not -necessarily equal `foo'. But the difference will be minute as long -as you stick to numbers that are fairly small (less than a trillion). - -Any lvalue can be incremented. Fields and array elements are -incremented just like variables. - -The decrement operator `--' works just like `++' except that it -subtracts 1 instead of adding. Like `++', it can be used before the -lvalue to pre--decrement or after it to post--decrement. - -Here is a summary of increment and decrement expressions. - -`++LVALUE' - This expression increments LVALUE and the new value becomes the - value of this expression. - -`LVALUE++' - This expression causes the contents of LVALUE to be incremented. - The value of the expression is the *old* value of LVALUE. - -`--LVALUE' - Like `++LVALUE', but instead of adding, it subtracts. It - decrements LVALUE and delivers the value that results. - -`LVALUE--' - Like `LVALUE++', but instead of adding, it subtracts. It - decrements LVALUE. The value of the expression is the *old* - value of LVALUE. - - - -File: gawk-info, Node: Conversion, Next: Conditional Exp, Prev: Increment Ops, Up: Expressions - -Conversion of Strings and Numbers -================================= - -Strings are converted to numbers, and numbers to strings, if the -context of your `awk' statement demands it. For example, if the -values of `foo' or `bar' in the expression `foo + bar' happen to be -strings, they are converted to numbers before the addition is -performed. If numeric values appear in string concatenation, they -are converted to strings. Consider this: - - two = 2; three = 3 - print (two three) + 4 - -This eventually prints the (numeric) value `27'. The numeric -variables `two' and `three' are converted to strings and concatenated -together, and the resulting string is converted back to a number -before adding `4'. The resulting numeric value `27' is printed. - -If, for some reason, you need to force a number to be converted to a -string, concatenate the null string with that number. To force a -string to be converted to a number, add zero to that string. Strings -that can't be interpreted as valid numbers are given the numeric -value zero. - -The exact manner in which numbers are converted into strings is -controlled by the `awk' special variable `OFMT' (*note Special::.). -Numbers are converted using a special version of the `sprintf' -function (*note Built-in::.) with `OFMT' as the format specifier. - -`OFMT''s default value is `"%.6g"', which prints a value with at -least six significant digits. You might want to change it to specify -more precision, if your version of `awk' uses double precision -arithmetic. Double precision on most modern machines gives you 16 or -17 decimal digits of precision. - -Strange results can happen if you set `OFMT' to a string that doesn't -tell `sprintf' how to format floating point numbers in a useful way. -For example, if you forget the `%' in the format, all numbers will be -converted to the same constant string. - - - -File: gawk-info, Node: Conditional Exp, Next: Function Calls, Prev: Conversion, Up: Expressions - -Conditional Expressions -======================= - -A "conditional expression" is a special kind of expression with three -operands. It allows you to use one expression's value to select one -of two other expressions. - -The conditional expression looks the same as in the C language: - - SELECTOR ? IF-TRUE-EXP : IF-FALSE-EXP - -There are three subexpressions. The first, SELECTOR, is always -computed first. If it is ``true'' (not zero) then IF-TRUE-EXP is -computed next and its value becomes the value of the whole expression. -Otherwise, IF-FALSE-EXP is computed next and its value becomes the -value of the whole expression. - -For example, this expression produces the absolute value of `x': - - x > 0 ? x : -x - -Each time the conditional expression is computed, exactly one of -IF-TRUE-EXP and IF-FALSE-EXP is computed; the other is ignored. This -is important when the expressions contain side effects. For example, -this conditional expression examines element `i' of either array `a' -or array `b', and increments `i'. - - x == y ? a[i++] : b[i++] - -This is guaranteed to increment `i' exactly once, because each time -one or the other of the two increment expressions will be executed -and the other will not be. - - - -File: gawk-info, Node: Function Calls, Prev: Conditional Exp, Up: Expressions - -Function Calls -============== - -A "function" is a name for a particular calculation. Because it has -a name, you can ask for it by name at any point in the program. For -example, the function `sqrt' computes the square root of a number. - -A fixed set of functions are "built in", which means they are -available in every `awk' program. The `sqrt' function is one of -these. *Note Built-in::, for a list of built--in functions and their -descriptions. In addition, you can define your own functions in the -program for use elsewhere in the same program. *Note User-defined::, -for how to do this. - -The way to use a function is with a "function call" expression, which -consists of the function name followed by a list of "arguments" in -parentheses. The arguments are expressions which give the raw -materials for the calculation that the function will do. When there -is more than one argument, they are separated by commas. If there -are no arguments, write just `()' after the function name. - -*Do not put any space between the function name and the -open--parenthesis!* A user--defined function name looks just like -the name of a variable, and space would make the expression look like -concatenation of a variable with an expression inside parentheses. -Space before the parenthesis is harmless with built--in functions, -but it is best not to get into the habit of using space, lest you do -likewise for a user--defined function one day by mistake. - -Each function needs a particular number of arguments. For example, -the `sqrt' function must be called with a single argument, like this: - - sqrt(ARGUMENT) - -The argument is the number to take the square root of. - -Some of the built--in functions allow you to omit the final argument. -If you do so, they will use a reasonable default. *Note Built-in::, -for full details. If arguments are omitted in calls to user--defined -functions, then those arguments are treated as local variables, -initialized to the null string (*note User-defined::.). - -Like every other expression, the function call has a value, which is -computed by the function based on the arguments you give it. In this -example, the value of `sqrt(ARGUMENT)' is the square root of the -argument. A function can also have side effects, such as assigning -the values of certain variables or doing I/O. - -Here is a command to read numbers, one number per line, and print the -square root of each one: - - awk '{ print "The square root of", $1, "is", sqrt($1) }' - - - -File: gawk-info, Node: Statements, Next: Arrays, Prev: Expressions, Up: Top - -Actions: Statements -******************* - -"Control statements" such as `if', `while', and so on control the -flow of execution in `awk' programs. Most of the control statements -in `awk' are patterned on similar statements in C. - -The simplest kind of statement is an expression. The other kinds of -statements start with special keywords such as `if' and `while', to -distinguish them from simple expressions. - -In all the examples in this chapter, BODY can be either a single -statement or a group of statements. Groups of statements are -enclosed in braces, and separated by newlines or semicolons. - -* Menu: - -* Expressions:: One kind of statement simply computes an expression. - -* If:: Conditionally execute some `awk' statements. - -* While:: Loop until some condition is satisfied. - -* Do:: Do specified action while looping until some - condition is satisfied. - -* For:: Another looping statement, that provides - initialization and increment clauses. - -* Break:: Immediately exit the innermost enclosing loop. - -* Continue:: Skip to the end of the innermost enclosing loop. - -* Next:: Stop processing the current input record. - -* Exit:: Stop execution of `awk'. - - - -File: gawk-info, Node: If, Next: While, Up: Statements - -The `if' Statement -================== - -The `if'-`else' statement is `awk''s decision--making statement. The -`else' part of the statement is optional. - - `if (CONDITION) BODY1 else BODY2' - -Here CONDITION is an expression that controls what the rest of the -statement will do. If CONDITION is true, BODY1 is executed; -otherwise, BODY2 is executed (assuming that the `else' clause is -present). The condition is considered true if it is nonzero or -nonnull. - -Here is an example: - - awk '{ if (x % 2 == 0) - print "x is even" - else - print "x is odd" }' - -In this example, if the statement containing `x' is found to be true -(that is, x is divisible by 2), then the first `print' statement is -executed, otherwise the second `print' statement is performed. - -If the `else' appears on the same line as BODY1, and BODY1 is a -single statement, then a semicolon must separate BODY1 from `else'. -To illustrate this, let's rewrite the previous example: - - awk '{ if (x % 2 == 0) print "x is even"; else - print "x is odd" }' - -If you forget the `;', `awk' won't be able to parse it, and you will -get a syntax error. - -We would not actually write this example this way, because a human -reader might fail to see the `else' if it were not the first thing on -its line. - - - -File: gawk-info, Node: While, Next: Do, Prev: If, Up: Statements - -The `while' Statement -===================== - -In programming, a loop means a part of a program that is (or at least -can be) executed two or more times in succession. - -The `while' statement is the simplest looping statement in `awk'. It -repeatedly executes a statement as long as a condition is true. It -looks like this: - - while (CONDITION) - BODY - -Here BODY is a statement that we call the "body" of the loop, and -CONDITION is an expression that controls how long the loop keeps -running. - -The first thing the `while' statement does is test CONDITION. If -CONDITION is true, it executes the statement BODY. After BODY has -been executed, CONDITION is tested again and this process is repeated -until CONDITION is no longer true. If CONDITION is initially false, -the body of the loop is never executed. - - awk '{ i = 1 - while (i <= 3) { - print $i - i++ - } - }' - -This example prints the first three input fields, one per line. - -The loop works like this: first, the value of `i' is set to 1. Then, -the `while' tests whether `i' is less than or equal to three. This -is the case when `i' equals one, so the `i'-th field is printed. -Then the `i++' increments the value of `i' and the loop repeats. - -When `i' reaches 4, the loop exits. Here BODY is a compound -statement enclosed in braces. As you can see, a newline is not -required between the condition and the body; but using one makes the -program clearer unless the body is a compound statement or is very -simple. - - - -File: gawk-info, Node: Do, Next: For, Prev: While, Up: Statements - -The `do'--`while' Statement -=========================== - -The `do' loop is a variation of the `while' looping statement. The -`do' loop executes the BODY once, then repeats BODY as long as -CONDITION is true. It looks like this: - - do - BODY - while (CONDITION) - -Even if CONDITION is false at the start, BODY is executed at least -once (and only once, unless executing BODY makes CONDITION true). -Contrast this with the corresponding `while' statement: - - while (CONDITION) - BODY - -This statement will not execute BODY even once if CONDITION is false -to begin with. - -Here is an example of a `do' statement: - - awk '{ i = 1 - do { - print $0 - i++ - } while (i <= 10) - }' - -prints each input record ten times. It isn't a very realistic -example, since in this case an ordinary `while' would do just as -well. But this is normal; there is only occasionally a real use for -a `do' statement. - - diff --git a/gawk-info-4 b/gawk-info-4 deleted file mode 100644 index c8e9b7ee..00000000 --- a/gawk-info-4 +++ /dev/null @@ -1,1400 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: For, Next: Break, Prev: Do, Up: Statements - -The `for' Statement -=================== - -The `for' statement makes it more convenient to count iterations of a -loop. The general form of the `for' statement looks like this: - - for (INITIALIZATION; CONDITION; INCREMENT) - BODY - -This statement starts by executing INITIALIZATION. Then, as long as -CONDITION is true, it repeatedly executes BODY and then INCREMENT. -Typically INITIALIZATION sets a variable to either zero or one, -INCREMENT adds 1 to it, and CONDITION compares it against the desired -number of iterations. - -Here is an example of a `for' statement: - - awk '{ for (i = 1; i <= 3; i++) - print $i - }' - -This prints the first three fields of each input record, one field -per line. - -In the `for' statement, BODY stands for any statement, but -INITIALIZATION, CONDITION and INCREMENT are just expressions. You -cannot set more than one variable in the INITIALIZATION part unless -you use a multiple assignment statement such as `x = y = 0', which is -possible only if all the initial values are equal. (But you can -initialize additional variables by writing their assignments as -separate statements preceding the `for' loop.) - -The same is true of the INCREMENT part; to increment additional -variables, you must write separate statements at the end of the loop. -The C compound expression, using C's comma operator, would be useful -in this context, but it is not supported in `awk'. - -Most often, INCREMENT is an increment expression, as in the example -above. But this is not required; it can be any expression whatever. -For example, this statement prints odd numbers from 1 to 100: - - # print odd numbers from 1 to 100 - for (i = 1; i <= 100; i += 2) - print i - -Any of the three expressions following `for' may be omitted if you -don't want it to do anything. Thus, `for (;x > 0;)' is equivalent to -`while (x > 0)'. If the CONDITION part is empty, it is treated as -TRUE, effectively yielding an infinite loop. - -In most cases, a `for' loop is an abbreviation for a `while' loop, as -shown here: - - INITIALIZATION - while (CONDITION) { - BODY - INCREMENT - } - -(The only exception is when the `continue' statement (*note -Continue::.) is used inside the loop; changing a `for' statement to a -`while' statement in this way can change the effect of the `continue' -statement inside the loop.) - -The `awk' language has a `for' statement in addition to a `while' -statement because often a `for' loop is both less work to type and -more natural to think of. Counting the number of iterations is very -common in loops. It can be easier to think of this counting as part -of looping rather than as something to do inside the loop. - -The next section has more complicated examples of `for' loops. - -There is an alternate version of the `for' loop, for iterating over -all the indices of an array: - - for (i in array) - PROCESS array[i] - -*Note Arrays::, for more information on this version of the `for' loop. - - - -File: gawk-info, Node: Break, Next: Continue, Prev: For, Up: Statements - -The `break' Statement -===================== - -The `break' statement jumps out of the innermost `for', `while', or -`do'--`while' loop that encloses it. The following example finds the -smallest divisor of any number, and also identifies prime numbers: - - awk '# find smallest divisor of num - { num = $1 - for (div = 2; div*div <= num; div++) - if (num % div == 0) - break - if (num % div == 0) - printf "Smallest divisor of %d is %d\n", num, div - else - printf "%d is prime\n", num }' - -When the remainder is zero in the first `if' statement, `awk' -immediately "breaks" out of the containing `for' loop. This means -that `awk' proceeds immediately to the statement following the loop -and continues processing. (This is very different from the `exit' -statement (*note Exit::.) which stops the entire `awk' program.) - -Here is another program equivalent to the previous one. It -illustrates how the CONDITION of a `for' or `while' could just as -well be replaced with a `break' inside an `if': - - awk '# find smallest divisor of num - { num = $1 - for (div = 2; ; div++) { - if (num % div == 0) { - printf "Smallest divisor of %d is %d\n", num, div - break - } - if (div*div > num) { - printf "%d is prime\n", num - break - } - } - }' - - - -File: gawk-info, Node: Continue, Next: Next, Prev: Break, Up: Statements - -The `continue' Statement -======================== - -The `continue' statement, like `break', is used only inside `for', -`while', and `do'--`while' loops. It skips over the rest of the loop -body, causing the next cycle around the loop to begin immediately. -Contrast this with `break', which jumps out of the loop altogether. -Here is an example: - - # print names that don't contain the string "ignore" - - # first, save the text of each line - { names[NR] = $0 } - - # print what we're interested in - END { - for (x in names) { - if (names[x] ~ /ignore/) - continue - print names[x] - } - } - -If any of the input records contain the string `ignore', this example -skips the print statement and continues back to the first statement -in the loop. - -This isn't a practical example of `continue', since it would be just -as easy to write the loop like this: - - for (x in names) - if (x !~ /ignore/) - print x - -The `continue' statement causes `awk' to skip the rest of what is -inside a `for' loop, but it resumes execution with the increment part -of the `for' loop. The following program illustrates this fact: - - awk 'BEGIN { - for (x = 0; x <= 20; x++) { - if (x == 5) - continue - printf ("%d ", x) - } - print "" - }' - -This program prints all the numbers from 0 to 20, except for 5, for -which the `printf' is skipped. Since the increment `x++' is not -skipped, `x' does not remain stuck at 5. - - - -File: gawk-info, Node: Next, Next: Exit, Prev: Continue, Up: Statements - -The `next' Statement -==================== - -The `next' statement forces `awk' to immediately stop processing the -current record and go on to the next record. This means that no -further rules are executed for the current record. The rest of the -current rule's action is not executed either. - -Contrast this with the effect of the `getline' function (*note -Getline::.). That too causes `awk' to read the next record -immediately, but it does not alter the flow of control in any way. -So the rest of the current action executes with a new input record. - -At the grossest level, `awk' program execution is a loop that reads -an input record and then tests each rule pattern against it. If you -think of this loop as a `for' statement whose body contains the -rules, then the `next' statement is analogous to a `continue' -statement: it skips to the end of the body of the loop, and executes -the increment (which reads another record). - -For example, if your `awk' program works only on records with four -fields, and you don't want it to fail when given bad input, you might -use the following rule near the beginning of the program: - - NF != 4 { - printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty" - next - } - -so that the following rules will not see the bad record. The error -message is redirected to `/dev/tty' (the terminal), so that it won't -get lost amid the rest of the program's regular output. - - - -File: gawk-info, Node: Exit, Prev: Next, Up: Statements - -The `exit' Statement -==================== - -The `exit' statement causes `awk' to immediately stop executing the -current rule and to stop processing input; any remaining input is -ignored. - -If an `exit' statement is executed from a `BEGIN' rule the program -stops processing everything immediately. No input records will be -read. However, if an `END' rule is present, it will be executed -(*note BEGIN/END::.). - -If `exit' is used as part of an `END' rule, it causes the program to -stop immediately. - -An `exit' statement that is part an ordinary rule (that is, not part -of a `BEGIN' or `END' rule) stops the execution of any further -automatic rules, but the `END' rule is executed if there is one. If -you don't want the `END' rule to do its job in this case, you can set -a variable to nonzero before the `exit' statement, and check that -variable in the `END' rule. - -If an argument is supplied to `exit', its value is used as the exit -status code for the `awk' process. If no argument is supplied, -`exit' returns status zero (success). - -For example, let's say you've discovered an error condition you -really don't know how to handle. Conventionally, programs report -this by exiting with a nonzero status. Your `awk' program can do -this using an `exit' statement with a nonzero argument. Here's an -example of this: - - BEGIN { - if (("date" | getline date_now) < 0) { - print "Can't get system date" - exit 4 - } - } - - - -File: gawk-info, Node: Arrays, Next: Built-in, Prev: Statements, Up: Top - -Actions: Using Arrays in `awk' -****************************** - -An "array" is a table of various values, called "elements". The -elements of an array are distinguished by their "indices". Names of -arrays in `awk' are strings of alphanumeric characters and -underscores, just like regular variables. - -You cannot use the same identifier as both a variable and as an array -name in one `awk' program. - -* Menu: - -* Intro: Array Intro. Basic facts abou arrays in `awk'. -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Example: Array Example. Sample program explained. - -* Scanning an Array:: A variation of the `for' statement. It loops - through the indices of an array's existing elements. - -* Delete:: The `delete' statement removes an element from an array. - -* Multi-dimensional:: Emulating multi--dimensional arrays in `awk'. -* Multi-scanning:: Scanning multi--dimensional arrays. - - - -File: gawk-info, Node: Array Intro, Next: Reference to Elements, Up: Arrays - -Introduction to Arrays -====================== - -The `awk' language has one--dimensional "arrays" for storing groups -of related strings or numbers. Each array must have a name; valid -array names are the same as valid variable names, and they do -conflict with variable names: you can't have both an array and a -variable with the same name at any point in an `awk' program. - -Arrays in `awk' superficially resemble arrays in other programming -languages; but there are fundamental differences. In `awk', you -don't need to declare the size of an array before you start to use it. -What's more, in `awk' any number or even a string may be used as an -array index. - -In most other languages, you have to "declare" an array and specify -how many elements or components it has. In such languages, the -declaration causes a contiguous block of memory to be allocated for -that many elements. An index in the array must be a positive -integer; for example, the index 0 specifies the first element in the -array, which is actually stored at the beginning of the block of -memory. Index 1 specifies the second element, which is stored in -memory right after the first element, and so on. It is impossible to -add more elements to the array, because it has room for only as many -elements as you declared. (Some languages have arrays whose first -index is 1, others require that you specify both the first and last -index when you declare the array. In such a language, an array could -be indexed, for example, from -3 to 17.) A contiguous array of four -elements might look like this, conceptually, if the element values -are 8, `"foo"', `""' and 30: - - +--------+--------+-------+--------+ - | 8 | "foo" | "" | 30 | value - +--------+--------+-------+--------+ - 0 1 2 3 index - -Only the values are stored; the indices are implicit from the order -of the values. 8 is the value at index 0, because 8 appears in the -position with 0 elements before it. - -Arrays in `awk' are different: they are "associative". This means -that each array is a collection of pairs: an index, and its -corresponding array element value: - - Element 4 Value 30 - Element 2 Value "foo" - Element 1 Value 8 - Element 3 Value "" - -We have shown the pairs in jumbled order because their order doesn't -mean anything. - -One advantage of an associative array is that new pairs can be added -at any time. For example, suppose we add to that array a tenth -element whose value is `"number ten"'. The result is this: - - Element 10 Value "number ten" - Element 4 Value 30 - Element 2 Value "foo" - Element 1 Value 8 - Element 3 Value "" - -Now the array is "sparse" (i.e. some indices are missing): it has -elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or 9. - -Another consequence of associative arrays is that the indices don't -have to be positive integers. Any number, or even a string, can be -an index. For example, here is an array which translates words from -English into French: - - Element "dog" Value "chien" - Element "cat" Value "chat" - Element "one" Value "un" - Element 1 Value "un" - -Here we decided to translate the number 1 in both spelled--out and -numeral form--thus illustrating that a single array can have both -numbers and strings as indices. - -When `awk' creates an array for you, e.g. with the `split' built--in -function (*note String Functions::.), that array's indices start at -the number one. - - - -File: gawk-info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Arrays - -Referring to an Array Element -============================= - -The principal way of using an array is to refer to one of its elements. -An array reference is an expression which looks like this: - - ARRAY[INDEX] - -Here ARRAY is the name of an array. The expression INDEX is the -index of the element of the array that you want. The value of the -array reference is the current value of that array element. - -For example, `foo[4.3]' is an expression for the element of array -`foo' at index 4.3. - -If you refer to an array element that has no recorded value, the -value of the reference is `""', the null string. This includes -elements to which you have not assigned any value, and elements that -have been deleted (*note Delete::.). Such a reference automatically -creates that array element, with the null string as its value. (In -some cases, this is unfortunate, because it might waste memory inside -`awk'). - -You can find out if an element exists in an array at a certain index -with the expression: - - INDEX in ARRAY - -This expression tests whether or not the particular index exists, -without the side effect of creating that element if it is not present. -The expression has the value 1 (true) if `ARRAY[SUBSCRIPT]' exists, -and 0 (false) if it does not exist. - -For example, to find out whether the array `frequencies' contains the -subscript `"2"', you would ask: - - if ("2" in frequencies) print "Subscript \"2\" is present." - -Note that this is *not* a test of whether or not the array -`frequencies' contains an element whose *value* is `"2"'. (There is -no way to that except to scan all the elements.) Also, this *does -not* create `frequencies["2"]', while the following (incorrect) -alternative would: - - if (frequencies["2"] != "") print "Subscript \"2\" is present." - - - -File: gawk-info, Node: Assigning Elements, Next: Array Example, Prev: Reference to Elements, Up: Arrays - -Assigning Array Elements -======================== - -Array elements are lvalues: they can be assigned values just like -`awk' variables: - - ARRAY[SUBSCRIPT] = VALUE - -Here ARRAY is the name of your array. The expression SUBSCRIPT is -the index of the element of the array that you want to assign a -value. The expression VALUE is the value you are assigning to that -element of the array. - - - -File: gawk-info, Node: Array Example, Next: Scanning an Array, Prev: Assigning Elements, Up: Arrays - -Basic Example of an Array -========================= - -The following program takes a list of lines, each beginning with a -line number, and prints them out in order of line number. The line -numbers are not in order, however, when they are first read: they -are scrambled. This program sorts the lines by making an array using -the line numbers as subscripts. It then prints out the lines in -sorted order of their numbers. It is a very simple program, and will -get confused if it encounters repeated numbers, gaps, or lines that -don't begin with a number. - - BEGIN { - max=0 - } - - { - if ($1 > max) - max = $1 - arr[$1] = $0 - } - - END { - for (x = 1; x <= max; x++) - print arr[x] - } - -The first rule just initializes the variable `max'. (This is not -strictly necessary, since an uninitialized variable has the null -string as its value, and the null string is effectively zero when -used in a context where a number is required.) - -The second rule keeps track of the largest line number seen so far; -it also stores each line into the array `arr', at an index that is -the line's number. - -The third rule runs after all the input has been read, to print out -all the lines. - -When this program is run with the following input: - - 5 I am the Five man - 2 Who are you? The new number two! - 4 . . . And four on the floor - 1 Who is number one? - 3 I three you. - - its output is this: - - 1 Who is number one? - 2 Who are you? The new number two! - 3 I three you. - 4 . . . And four on the floor - 5 I am the Five man - - - -File: gawk-info, Node: Scanning an Array, Next: Delete, Prev: Array Example, Up: Arrays - -Scanning All Elements of an Array -================================= - -In programs that use arrays, often you need a loop that will execute -once for each element of an array. In other languages, where arrays -are contiguous and indices are limited to positive integers, this is -easy: the largest index is one less than the length of the array, and -you can find all the valid indices by counting from zero up to that -value. This technique won't do the job in `awk', since any number or -string may be an array index. So `awk' has a special kind of `for' -statement for scanning an array: - - for (VAR in ARRAY) - BODY - -This loop executes BODY once for each different value that your -program has previously used as an index in ARRAY, with the variable -VAR set to that index. - -Here is a program that uses this form of the `for' statement. The -first rule scans the input records and notes which words appear (at -least once) in the input, by storing a 1 into the array `used' with -the word as index. The second rule scans the elements of `used' to -find all the distinct words that appear in the input. It prints each -word that is more than 10 characters long, and also prints the number -of such words. *Note Built-in::, for more information on the -built--in function `length'. - - # Record a 1 for each word that is used at least once. - { - for (i = 0; i < NF; i++) - used[$i] = 1 - } - - # Find number of distinct words more than 10 characters long. - END { - num_long_words = 0 - for (x in used) - if (length(x) > 10) { - ++num_long_words - print x - } - print num_long_words, "words longer than 10 characters" - } - -*Note Sample Program::, for a more detailed example of this type. - -The order in which elements of the array are accessed by this -statement is determined by the internal arrangement of the array -elements within `awk' and cannot be controlled or changed. This can -lead to problems if new elements are added to ARRAY by statements in -BODY; you cannot predict whether or not the `for' loop will reach -them. Similarly, changing VAR inside the loop can produce strange -results. It is best to avoid such things. - - - -File: gawk-info, Node: Delete, Next: Multi-dimensional, Prev: Scanning an Array, Up: Arrays - -The `delete' Statement -====================== - -You can remove an individual element of an array using the `delete' -statement: - - delete ARRAY[INDEX] - -When an array element is deleted, it is as if you had never referred -to it and had never given it any value. Any value the element -formerly had can no longer be obtained. - -Here is an example of deleting elements in an array: - - awk '{ for (i in frequencies) - delete frequencies[i] - }' - -This example removes all the elements from the array `frequencies'. - -If you delete an element, the `for' statement to scan the array will -not report that element, and the `in' operator to check for the -presence of that element will return 0: - - delete foo[4] - if (4 in foo) - print "This will never be printed" - - - -File: gawk-info, Node: Multi-dimensional, Next: Multi-scanning, Prev: Delete, Up: Arrays - -Multi--dimensional arrays -========================= - -A multi--dimensional array is an array in which an element is -identified by a sequence of indices, not a single index. For -example, a two--dimensional array requires two indices. The usual -way (in most languages, including `awk') to refer to an element of a -two--dimensional array named `grid' is with `grid[x,y]'. - -Multi--dimensional arrays are supported in `awk' through -concatenation of indices into one string. What happens is that `awk' -converts the indices into strings (*note Conversion::.) and -concatenates them together, with a separator between them. This -creates a single string that describes the values of the separate -indices. The combined string is used as a single index into an -ordinary, one--dimensional array. The separator used is the value of -the special variable `SUBSEP'. - -For example, suppose the value of `SUBSEP' is `","' and the -expression `foo[5,12]="value"' is executed. The numbers 5 and 12 -will be concatenated with a comma between them, yielding `"5,12"'; -thus, the array element `foo["5,12"]' will be set to `"value"'. - -Once the element's value is stored, `awk' has no record of whether it -was stored with a single index or a sequence of indices. The two -expressions `foo[5,12]' and `foo[5 SUBSEP 12]' always have the same -value. - -The default value of `SUBSEP' is not a comma; it is the string -`"\034"', which contains a nonprinting character that is unlikely to -appear in an `awk' program or in the input data. - -The usefulness of choosing an unlikely character comes from the fact -that index values that contain a string matching `SUBSEP' lead to -combined strings that are ambiguous. Suppose that `SUBSEP' is a -comma; then `foo["a,b", "c"]' and `foo["a", "b,c"]' will be -indistinguishable because both are actually stored as `foo["a,b,c"]'. -Because `SUBSEP' is `"\034"', such confusion can actually happen only -when an index contains the character `"\034"', which is a rare event. - -You can test whether a particular index--sequence exists in a -``multi--dimensional'' array with the same operator `in' used for -single dimensional arrays. Instead of a single index as the -left--hand operand, write the whole sequence of indices, separated by -commas, in parentheses: - - (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY - -The following example treats its input as a two--dimensional array of -fields; it rotates this array 90 degrees clockwise and prints the -result. It assumes that all lines have the same number of elements. - - awk 'BEGIN { - max_nf = max_nr = 0 - } - - { - if (max_nf < NF) - max_nf = NF - max_nr = NR - for (x = 1; x <= NF; x++) - vector[x, NR] = $x - } - - END { - for (x = 1; x <= max_nf; x++) { - for (y = max_nr; y >= 1; --y) - printf("%s ", vector[x, y]) - printf("\n") - } - }' - -When given the input: - - 1 2 3 4 5 6 - 2 3 4 5 6 1 - 3 4 5 6 1 2 - 4 5 6 1 2 3 - -it produces: - - 4 3 2 1 - 5 4 3 2 - 6 5 4 3 - 1 6 5 4 - 2 1 6 5 - 3 2 1 6 - - - -File: gawk-info, Node: Multi-scanning, Prev: Multi-dimensional, Up: Arrays - -Scanning Multi--dimensional Arrays -================================== - -There is no special `for' statement for scanning a -``multi--dimensional'' array; there cannot be one, because in truth -there are no multi--dimensional arrays or elements; there is only a -multi--dimensional *way of accessing* an array. - -However, if your program has an array that is always accessed as -multi--dimensional, you can get the effect of scanning it by -combining the scanning `for' statement (*note Scanning an Array::.) -with the `split' built--in function (*note String Functions::.). It -works like this: - - for (combined in ARRAY) { - split (combined, separate, SUBSEP) - ... - } - -This finds each concatenated, combined index in the array, and splits -it into the individual indices by breaking it apart where the value -of `SUBSEP' appears. The split--out indices become the elements of -the array `separate'. - -Thus, suppose you have previously stored in `ARRAY[1, "foo"]'; then -an element with index `"1\034foo"' exists in ARRAY. (Recall that the -default value of `SUBSEP' contains the character with code 034.) -Sooner or later the `for' statement will find that index and do an -iteration with `combined' set to `"1\034foo"'. Then the `split' -function will be called as follows: - - split ("1\034foo", separate, "\034") - -The result of this is to set `separate[1]' to 1 and `separate[2]' to -`"foo"'. Presto, the original sequence of separate indices has been -recovered. - - - -File: gawk-info, Node: Built-in, Next: User-defined, Prev: Arrays, Up: Top - -Built--in functions -******************* - -"Built--in" functions are functions always available for your `awk' -program to call. This chapter defines all the built--in functions -that exist; some of them are mentioned in other sections, but they -are summarized here for your convenience. (You can also define new -functions yourself. *Note User-defined::.) - -In most cases, any extra arguments given to built--in functions are -ignored. The defaults for omitted arguments vary from function to -function and are described under the individual functions. - -The name of a built--in function need not be followed immediately by -the opening left parenthesis of the arguments; whitespace is allowed. -However, it is wise to write no space there, since user--defined -functions do not allow space. - -When a function is called, expressions that create the function's -actual parameters are evaluated completely before the function call -is performed. For example, in the code fragment: - - i = 4 - j = myfunc(i++) - -the variable `i' will be set to 5 before `myfunc' is called with a -value of 4 for its actual parameter. - -* Menu: - -* Numeric Functions:: Functions that work with numbers, - including `int', `sin' and `rand'. - -* String Functions:: Functions for string manipulation, - such as `split', `match', and `sprintf'. - -* I/O Functions:: Functions for files and shell commands - - - -File: gawk-info, Node: Numeric Functions, Next: String Functions, Up: Built-in - -Numeric Built--in Functions -=========================== - -The general syntax of the numeric built--in functions is the same for -each. Here is an example of that syntax: - - awk '# Read input records containing a pair of points: x0, y0, x1, y1. - # Print the points and the distance between them. - { printf "%f %f %f %f %f\n", $1, $2, $3, $4, - sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) }' - -This calculates the square root of a calculation that uses the values -of the fields. It then prints the first four fields of the input -record and the result of the square root calculation. - -Here is the full list of numeric built--in functions: - -`int(X)' - This gives you the integer part of X, truncated toward 0. This - produces the nearest integer to X, located between X and 0. - - For example, `int(3)' is 3, `int(3.9)' is 3, `int(-3.9)' is -3, - and `int(-3)' is -3 as well. - -`sqrt(X)' - This gives you the positive square root of X. It reports an - error if X is negative. - -`exp(X)' - This gives you the exponential of X, or reports an error if X is - out of range. The range of values X can have depends on your - machine's floating point representation. - -`log(X)' - This gives you the natural logarithm of X, if X is positive; - otherwise, it reports an error. - -`sin(X)' - This gives you the sine of X, with X in radians. - -`cos(X)' - This gives you the cosine of X, with X in radians. - -`atan2(Y, X)' - This gives you the arctangent of Y/X, with both in radians. - -`rand()' - This gives you a random number. The values of `rand()' are - uniformly--distributed between 0 and 1. The value is never 0 - and never 1. - - Often you want random integers instead. Here is a user--defined - function you can use to obtain a random nonnegative integer less - than N: - - function randint(n) { - return int(n * rand()) - } - - The multiplication produces a random real number at least 0, and - less than N. We then make it an integer (using `int') between 0 - and `N-1'. - - Here is an example where a similar function is used to produce - random integers between 1 and N: - - awk ' - # Function to roll a simulated die. - function roll(n) { return 1 + int(rand() * n) } - - # Roll 3 six--sided dice and print total number of points. - { - printf("%d points\n", roll(6)+roll(6)+roll(6)) - }' - - *Note* that `rand()' starts generating numbers from the same - point, or "seed", each time you run `awk'. This means that the - same program will produce the same results each time you run it. - The numbers are random within one `awk' run, but predictable - from run to run. This is convenient for debugging, but if you - want a program to do different things each time it is used, you - must change the seed to a value that will be different in each - run. To do this, use `srand'. - -`srand(X)' - The function `srand(X)' sets the starting point, or "seed", for - generating random numbers to the value X. - - Each seed value leads to a particular sequence of ``random'' - numbers. Thus, if you set the seed to the same value a second - time, you will get the same sequence of ``random'' numbers again. - - If you omit the argument X, as in `srand()', then the current - date and time of day are used for a seed. This is the way to - get random numbers that are truly unpredictable. - - The return value of `srand()' is the previous seed. This makes - it easy to keep track of the seeds for use in consistently - reproducing sequences of random numbers. - - - -File: gawk-info, Node: String Functions, Next: I/O Functions, Prev: Numeric Functions, Up: Built-in - -Built--in Functions for String Manipulation -=========================================== - -`index(IN, FIND)' - This searches the string IN for the first occurrence of the - string FIND, and returns the position where that occurrence - begins in the string IN. For example: - - awk 'BEGIN { print index("peanut", "an") }' - - prints `3'. If FIND is not found, `index' returns 0. - -`length(STRING)' - This gives you the number of characters in STRING. If STRING is - a number, the length of the digit string representing that - number is returned. For example, `length("abcde")' is 5. - Whereas, `length(15 * 35)' works out to 3. How? Well, 15 * 35 - = 525, and 525 is then converted to the string `"525"', which - has three characters. - -`match(STRING, REGEXP)' - The `match' function searches the string, STRING, for the - longest, leftmost substring matched by the regular expression, - REGEXP. It returns the character position, or "index", of where - that substring begins (1, if it starts at the beginning of - STRING). If no match if found, it returns 0. - - The `match' function sets the special variable `RSTART' to the - index. It also sets the special variable `RLENGTH' to the - length of the matched substring. If no match is found, `RSTART' - is set to 0, and `RLENGTH' to -1. - - For example: - - awk '{ - if ($1 == "FIND") - regex = $2 - else { - where = match($0, regex) - if (where) - print "Match of", regex, "found at", where, "in", $0 - } - }' - - This program looks for lines that match the regular expression - stored in the variable `regex'. This regular expression can be - changed. If the first word on a line is `FIND', `regex' is - changed to be the second word on that line. Therefore, given: - - FIND fo*bar - My program was a foobar - But none of it would doobar - FIND Melvin - JF+KM - This line is property of The Reality Engineering Co. - This file was created by Melvin. - - `awk' prints: - - Match of fo*bar found at 18 in My program was a foobar - Match of Melvin found at 26 in This file was created by Melvin. - -`split(STRING, ARRAY, FIELD_SEPARATOR)' - This divides STRING up into pieces separated by FIELD_SEPARATOR, - and stores the pieces in ARRAY. The first piece is stored in - `ARRAY[1]', the second piece in `ARRAY[2]', and so forth. The - string value of the third argument, FIELD_SEPARATOR, is used as - a regexp to search for to find the places to split STRING. If - the FIELD_SEPARATOR is omitted, the value of `FS' is used. - `split' returns the number of elements created. - - The `split' function, then, splits strings into pieces in a - manner similar to the way input lines are split into fields. - For example: - - split("auto-da-fe", a, "-") - - splits the string `auto-da-fe' into three fields using `-' as - the separator. It sets the contents of the array `a' as follows: - - a[1] = "auto" - a[2] = "da" - a[3] = "fe" - - The value returned by this call to `split' is 3. - -`sprintf(FORMAT, EXPRESSION1,...)' - This returns (without printing) the string that `printf' would - have printed out with the same arguments (*note Printf::.). For - example: - - sprintf("pi = %.2f (approx.)", 22/7) - - returns the string `"pi = 3.14 (approx.)"'. - -`sub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)' - The `sub' function alters the value of TARGET_VARIABLE. It - searches this value, which should be a string, for the leftmost - substring matched by the regular expression, REGEXP, extending - this match as far as possible. Then the entire string is - changed by replacing the matched text with REPLACEMENT_STRING. - The modified string becomes the new value of TARGET_VARIABLE. - - This function is peculiar because TARGET_VARIABLE is not simply - used to compute a value, and not just any expression will do: it - must be a variable, field or array reference, so that `sub' can - store a modified value there. If this argument is omitted, then - the default is to use and alter `$0'. - - For example: - - str = "water, water, everywhere" - sub(/at/, "ith", str) - - sets `str' to `"wither, water, everywhere"', by replacing the - leftmost, longest occurrence of `at' with `ith'. - - The `sub' function returns the number of substitutions made - (either one or zero). - - The special character, `&', in the replacement string, - REPLACEMENT_STRING, stands for the precise substring that was - matched by REGEXP. (If the regexp can match more than one - string, then this precise substring may vary.) For example: - - awk '{ sub(/candidate/, "& and his wife"); print }' - - will change the first occurrence of ``candidate'' to ``candidate - and his wife'' on each input line. - - The effect of this special character can be turned off by - preceding it with a backslash (`\&'). To include a backslash in - the replacement string, it too must be preceded with a (second) - backslash. - - Note: if you use `sub' with a third argument that is not a - variable, field or array element reference, then it will still - search for the pattern and return 0 or 1, but the modified - string is thrown away because there is no place to put it. For - example: - - sub(/USA/, "United States", "the USA and Canada") - - will indeed produce a string `"the United States and Canada"', - but there will be no way to use that string! - -`gsub(REGEXP, REPLACEMENT_STRING, TARGET_VARIABLE)' - This is similar to the `sub' function, except `gsub' replaces - *all* of the longest, leftmost, *non--overlapping* matching - substrings it can find. The ``g'' in `gsub' stands for - "global", which means replace *everywhere*. For example: - - awk '{ gsub(/Britain/, "United Kingdom"); print }' - - replaces all occurrences of the string `Britain' with `United - Kingdom' for all input records. - - The `gsub' function returns the number of substitutions made. - If the variable to be searched and altered, TARGET_VARIABLE, is - omitted, then the entire input record, `$0', is used. - - The characters `&' and `\' are special in `gsub' as they are in - `sub' (see immediately above). - -`substr(STRING, START, LENGTH)' - This returns a LENGTH--character--long substring of STRING, - starting at character number START. The first character of a - string is character number one. For example, - `substr("washington", 5, 3)' returns `"ing"'. - - If LENGTH is not present, this function returns the whole suffix - of STRING that begins at character number START. For example, - `substr("washington", 5)' returns `"ington"'. - - - -File: gawk-info, Node: I/O Functions, Prev: String Functions, Up: Built-in - -Built--in Functions for I/O to Files and Commands -================================================= - -`close(FILENAME)' - Close the file FILENAME. The argument may alternatively be a - shell command that was used for redirecting to or from a pipe; - then the pipe is closed. - - *Note Close Input::, regarding closing input files and pipes. - *Note Close Output::, regarding closing output files and pipes. - -`system(COMMAND)' - The system function allows the user to execute operating system - commands and then return to the `awk' program. The `system' - function executes the command given by the string value of - COMMAND. It returns, as its value, the status returned by the - command that was executed. This is known as returning the "exit - status". - - For example, if the following fragment of code is put in your - `awk' program: - - END { - system("mail -s 'awk run done' operator < /dev/null") - } - - the system operator will be sent mail when the `awk' program - finishes processing input and begins its end--of--input - processing. - - Note that much the same result can be obtained by redirecting - `print' or `printf' into a pipe. However, if your `awk' program - is interactive, this function is useful for cranking up large - self--contained programs, such as a shell or an editor. - - - -File: gawk-info, Node: User-defined, Next: Special, Prev: Built-in, Up: Top - -User--defined Functions -*********************** - -Complicated `awk' programs can often be simplified by defining your -own functions. User--defined functions can be called just like -built--in ones (*note Function Calls::.), but it is up to you to -define them--to tell `awk' what they should do. - -* Menu: - -* Definition Syntax:: How to write definitions and what they mean. -* Function Example:: An example function definition and what it does. -* Function Caveats:: Things to watch out for. -* Return Statement:: Specifying the value a function returns. - - - -File: gawk-info, Node: Definition Syntax, Next: Function Example, Up: User-defined - -Syntax of Function Definitions -============================== - -The definition of a function named NAME looks like this: - - function NAME (PARAMETER-LIST) { - BODY-OF-FUNCTION - } - -A valid function name is like a valid variable name: a sequence of -letters, digits and underscores, not starting with a digit. - -Such function definitions can appear anywhere between the rules of -the `awk' program. The general format of an `awk' program, then, is -now modified to include sequences of rules *and* user--defined -function definitions. - -The function definition need not precede all the uses of the function. -This is because `awk' reads the entire program before starting to -execute any of it. - -The PARAMETER-LIST is a list of the function's "local" variable -names, separated by commas. Within the body of the function, local -variables refer to arguments with which the function is called. If -the function is called with fewer arguments than it has local -variables, this is not an error; the extra local variables are simply -set as the null string. - -The local variable values hide or "shadow" any variables of the same -names used in the rest of the program. The shadowed variables are -not accessible in the function definition, because there is no way to -name them while their names have been taken away for the local -variables. All other variables used in the `awk' program can be -referenced or set normally in the function definition. - -The local variables last only as long as the function is executing. -Once the function finishes, the shadowed variables come back. - -The BODY-OF-FUNCTION part of the definition is the most important -part, because this is what says what the function should actually *do*. -The local variables exist to give the body a way to talk about the -arguments. - -Functions may be "recursive", i.e., they can call themselves, either -directly, or indirectly (via calling a second function that calls the -first again). - -The keyword `function' may also be written `func'. - - - -File: gawk-info, Node: Function Example, Next: Function Caveats, Prev: Definition Syntax, Up: User-defined - -Function Definition Example -=========================== - -Here is an example of a user--defined function, called `myprint', -that takes a number and prints it in a specific format. - - function myprint(num) - { - printf "%6.3g\n", num - } - -To illustrate, let's use the following `awk' rule to use, or "call", -our `myprint' function: - - $3 > 0 { myprint($3) }' - -This program prints, in our special format, all the third fields that -contain a positive number in our input. Therefore, when given: - - 1.2 3.4 5.6 7.8 - 9.10 11.12 13.14 15.16 - 17.18 19.20 21.22 23.24 - -this program, using our function to format the results, will print: - - 5.6 - 13.1 - 21.2 - -Here is a rather contrived example of a recursive function. It -prints a string backwards: - - function rev (str, len) { - if (len == 0) { - printf "\n" - return - } - printf "%c", substr(str, len, 1) - rev(str, len - 1) - } - - - -File: gawk-info, Node: Function Caveats, Next: Return Statement, Prev: Function Example, Up: User-defined - -Caveats of Function Calling -=========================== - -*Note* that there cannot be any blanks between the function name and -the left parenthesis of the argument list, when calling a function. -This is so `awk' can tell you are not trying to concatenate the value -of a variable with the value of an expression inside the parentheses. - -When a function is called, it is given a *copy* of the values of its -arguments. This is called "passing by value". The caller may use a -variable as the expression for the argument, but the called function -does not know this: all it knows is what value the argument had. For -example, if you write this code: - - foo = "bar" - z = myfunc(foo) - -then you should not think of the argument to `myfunc' as being ``the -variable `foo'''. Instead, think of the argument as the string -value, `"bar"'. - -If the function `myfunc' alters the values of its local variables, -this has no effect on any other variables. In particular, if -`myfunc' does this: - - function myfunc (win) { - print win - win = "zzz" - print win - } - -to change its first argument variable `win', this *does not* change -the value of `foo' in the caller. The role of `foo' in calling -`myfunc' ended when its value, `"bar"', was computed. If `win' also -exists outside of `myfunc', this definition will not change it--that -value is shadowed during the execution of `myfunc' and cannot be seen -or changed from there. - -However, when arrays are the parameters to functions, they are *not* -copied. Instead, the array itself is made available for direct -manipulation by the function. This is usually called "passing by -reference". Changes made to an array parameter inside the body of a -function *are* visible outside that function. *This can be very -dangerous if you don't watch what you are doing.* For example: - - function changeit (array, ind, nvalue) { - array[ind] = nvalue - } - - BEGIN { - a[1] = 1 ; a[2] = 2 ; a[3] = 3 - changeit(a, 2, "two") - printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3] - } - -will print `a[1] = 1, a[2] = two, a[3] = 3', because the call to -`changeit' stores `"two"' in the second element of `a'. - - - -File: gawk-info, Node: Return Statement, Prev: Function Caveats, Up: User-defined - -The `return' statement -====================== - -The body of a user--defined function can contain a `return' statement. -This statement returns control to the rest of the `awk' program. It -can also be used to return a value for use in the rest of the `awk' -program. It looks like: - - `return EXPRESSION' - -The EXPRESSION part is optional. If it is omitted, then the returned -value is undefined and, therefore, unpredictable. - -A `return' statement with no value expression is assumed at the end -of every function definition. So if control reaches the end of the -function definition, then the function returns an unpredictable value. - -Here is an example of a user--defined function that returns a value -for the largest number among the elements of an array: - - function maxelt (vec, i, ret) { - for (i in vec) { - if (ret == "" || vec[i] > ret) - ret = vec[i] - } - return ret - } - -You call `maxelt' with one argument, an array name. The local -variables `i' and `ret' are not intended to be arguments; while there -is nothing to stop you from passing two or three arguments to -`maxelt', the results would be strange. - -When writing a function definition, it is conventional to separate -the parameters from the local variables with extra spaces, as shown -above in the definition of `maxelt'. - -Here is a program that uses, or calls, our `maxelt' function. This -program loads an array, calls `maxelt', and then reports the maximum -number in that array: - - awk ' - function maxelt (vec, i, ret) { - for (i in vec) { - if (ret == "" || vec[i] > ret) - ret = vec[i] - } - return ret - } - - # Load all fields of each record into nums. - { - for(i = 1; i <= NF; i++) - nums[NR, i] = $i - } - - END { - print maxelt(nums) - }' - -Given the following input: - - 1 5 23 8 16 - 44 3 5 2 8 26 - 256 291 1396 2962 100 - -6 467 998 1101 - 99385 11 0 225 - -our program tells us (predictably) that: - - 99385 - -is the largest number in our array. - - - -File: gawk-info, Node: Special, Next: Sample Program, Prev: User-defined, Up: Top - -Special Variables -***************** - -Most `awk' variables are available for you to use for your own -purposes; they will never change except when your program assigns -them, and will never affect anything except when your program -examines them. - -A few variables have special meanings. Some of them `awk' examines -automatically, so that they enable you to tell `awk' how to do -certain things. Others are set automatically by `awk', so that they -carry information from the internal workings of `awk' to your program. - -Most of these variables are also documented in the chapters where -their areas of activity are described. - -* Menu: - -* User-modified:: Special variables that you change to control `awk'. - -* Auto-set:: Special variables where `awk' gives you information. - -
\ No newline at end of file diff --git a/gawk-info-5 b/gawk-info-5 deleted file mode 100644 index fd8d7eec..00000000 --- a/gawk-info-5 +++ /dev/null @@ -1,960 +0,0 @@ -Info file gawk-info, produced by Makeinfo, -*- Text -*- from input -file gawk.texinfo. - -This file documents `awk', a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - - -File: gawk-info, Node: User-modified, Next: Auto-set, Up: Special - -Special Variables That Control `awk' -==================================== - -This is a list of the variables which you can change to control how -`awk' does certain things. - -`FS' - `FS' is the input field separator (*note Field Separators::.). - The value is a regular expression that matches the separations - between fields in an input record. - - The default value is `" "', a string consisting of a single - space. As a special exception, this value actually means that - any sequence of spaces and tabs is a single separator. It also - causes spaces and tabs at the beginning or end of a line to be - ignored. - - You can set the value of `FS' on the command line using the `-F' - option: - - awk -F, 'PROGRAM' INPUT-FILES - -`OFMT' - This string is used by `awk' to control conversion of numbers to - strings (*note Conversion::.). It works by being passed, in - effect, as the first argument to the `sprintf' function. Its - default value is `"%.6g"'. - -`OFS' - This is the output field separator (*note Output Separators::.). - It is output between the fields output by a `print' statement. - Its default value is `" "', a string consisting of a single space. - -`ORS' - This is the output record separator (*note Output - Separators::.). It is output at the end of every `print' - statement. Its default value is the newline character, often - represented in `awk' programs as `\n'. - -`RS' - This is `awk''s record separator (*note Records::.). Its - default value is a string containing a single newline character, - which means that an input record consists of a single line of - text. - -`SUBSEP' - `SUBSEP' is a subscript separator (*note Multi-dimensional::.). - It has the default value of `"\034"', and is used to separate - the parts of the name of a multi--dimensional array. Thus, if - you access `foo[12,3]', it really accesses `foo["12\0343"]'. - - - -File: gawk-info, Node: Auto-set, Prev: User-modified, Up: Special - -Special Variables That Convey Information to You -================================================ - -This is a list of the variables that are set automatically by `awk' -on certain occasions so as to provide information for your program. - -`ARGC' -`ARGV' - The command--line arguments available to `awk' are stored in an - array called `ARGV'. `ARGC' is the number of command--line - arguments present. `ARGV' is indexed from zero to `ARGC' - 1. - For example: - - awk '{ print ARGV[$1] }' inventory-shipped BBS-list - - In this example, `ARGV[0]' contains `"awk"', `ARGV[1]' contains - `"inventory-shipped"', and `ARGV[2]' contains `"BBS-list"'. - `ARGC' is 3, one more than the index of the last element in - `ARGV' since the elements are numbered from zero. - - Notice that the `awk' program is not treated as an argument. - The `-f' `FILENAME' option, and the `-F' option, are also not - treated as arguments for this purpose. - - Variable assignments on the command line *are* treated as - arguments, and do show up in the `ARGV' array. - - Your program can alter `ARGC' the elements of `ARGV'. Each time - `awk' reaches the end of an input file, it uses the next element - of `ARGV' as the name of the next input file. By storing a - different string there, your program can change which files are - read. You can use `-' to represent the standard input. By - storing additional elements and incrementing `ARGC' you can - cause additional files to be read. - - If you decrease the value of `ARGC', that eliminates input files - from the end of the list. By recording the old value of `ARGC' - elsewhere, your program can treat the eliminated arguments as - something other than file names. - - To eliminate a file from the middle of the list, store the null - string (`""') into `ARGV' in place of the file's name. As a - special feature, `awk' ignores file names that have been - replaced with the null string. - -`ENVIRON' - This is an array that contains the values of the environment. - The array indices are the environment variable names; the values - are the values of the particular environment variables. For - example, `ENVIRON["HOME"]' might be `/u/close'. Changing this - array does not affect the environment passed on to any programs - that `awk' may spawn via redirection or the `system' function. - (This may not work under operating systems other than MS-DOS, - Unix, or GNU.) - -`FILENAME' - This is the name of the file that `awk' is currently reading. - If `awk' is reading from the standard input (in other words, - there are no files listed on the command line), `FILENAME' is - set to `"-"'. `FILENAME' is changed each time a new file is - read (*note Reading Files::.). - -`FNR' - `FNR' is the current record number in the current file. `FNR' - is incremented each time a new record is read (*note Getline::.). - It is reinitialized to 0 each time a new input file is started. - -`NF' - `NF' is the number of fields in the current input record. `NF' - is set each time a new record is read, when a new field is - created, or when $0 changes (*note Fields::.). - -`NR' - This is the number of input records `awk' has processed since - the beginning of the program's execution. (*note Records::.). - `NR' is set each time a new record is read. - -`RLENGTH' - `RLENGTH' is the length of the string matched by the `match' - function (*note String Functions::.). `RLENGTH' is set by - invoking the `match' function. Its value is the length of the - matched string, or -1 if no match was found. - -`RSTART' - `RSTART' is the start of the string matched by the `match' - function (*note String Functions::.). `RSTART' is set by - invoking the `match' function. Its value is the position of the - string where the matched string starts, or 0 if no match was - found. - - - -File: gawk-info, Node: Sample Program, Next: Notes, Prev: Special, Up: Top - -Sample Program -************** - -The following example is a complete `awk' program, which prints the -number of occurrences of each word in its input. It illustrates the -associative nature of `awk' arrays by using strings as subscripts. -It also demonstrates the `for X in ARRAY' construction. Finally, it -shows how `awk' can be used in conjunction with other utility -programs to do a useful task of some complexity with a minimum of -effort. Some explanations follow the program listing. - - awk ' - # Print list of word frequencies - { - for (i = 1; i <= NF; i++) - freq[$i]++ - } - - END { - for (word in freq) - printf "%s\t%d\n", word, freq[word] - }' - -The first thing to notice about this program is that it has two -rules. The first rule, because it has an empty pattern, is executed -on every line of the input. It uses `awk''s field--accessing -mechanism (*note Fields::.) to pick out the individual words from the -line, and the special variable `NF' (*note Special::.) to know how -many fields are available. - -For each input word, an element of the array `freq' is incremented to -reflect that the word has been seen an additional time. - -The second rule, because it has the pattern `END', is not executed -until the input has been exhausted. It prints out the contents of -the `freq' table that has been built up inside the first action. - -Note that this program has several problems that would prevent it -from being useful by itself on real text files: - - * Words are detected using the `awk' convention that fields are - separated by whitespace and that other characters in the input - (except newlines) don't have any special meaning to `awk'. This - means that punctuation characters count as part of words. - - * The `awk' language considers upper and lower case characters to - be distinct. Therefore, `foo' and `Foo' will not be treated by - this program as the same word. This is undesirable since in - normal text, words are capitalized if they begin sentences, and - a frequency analyzer should not be sensitive to that. - - * The output does not come out in any useful order. You're more - likely to be interested in which words occur most frequently, or - having an alphabetized table of how frequently each word occurs. - -The way to solve these problems is to use other operating system -utilities to process the input and output of the `awk' script. -Suppose the script shown above is saved in the file `frequency.awk'. -Then the shell command: - - tr A-Z a-z < file1 | tr -cd 'a-z\012' \ - | awk -f frequency.awk \ - | sort +1 -nr - -produces a table of the words appearing in `file1' in order of -decreasing frequency. - -The first `tr' command in this pipeline translates all the upper case -characters in `file1' to lower case. The second `tr' command deletes -all the characters in the input except lower case characters and -newlines. The second argument to the second `tr' is quoted to -protect the backslash in it from being interpreted by the shell. The -`awk' program reads this suitably massaged data and produces a word -frequency table, which is not ordered. - -The `awk' script's output is now sorted by the `sort' command and -printed on the terminal. The options given to `sort' in this example -specify to sort by the second field of each input line (skipping one -field), that the sort keys should be treated as numeric quantities -(otherwise `15' would come before `5'), and that the sorting should -be done in descending (reverse) order. - -See the general operating system documentation for more information -on how to use the `tr' and `sort' commands. - - - -File: gawk-info, Node: Notes, Next: Glossary, Prev: Sample Program, Up: Top - -Implementation Notes -******************** - -This appendix contains information mainly of interest to implementors -and maintainers of `gawk'. Everything in it applies specifically to -`gawk', and not to other implementations. - -* Menu: - -* Extensions:: Things`gawk' does that Unix `awk' does not. - -* Future Extensions:: Things likely to appear in a future release. - -* Improvements:: Suggestions for future improvements. - -* Manual Improvements:: Suggestions for improvements to this manual. - - - -File: gawk-info, Node: Extensions, Next: Future Extensions, Up: Notes - -GNU Extensions to the AWK Language -================================== - -Several new features are in a state of flux. They are described here -merely to document them somewhat, but they will probably change. We -hope they will be incorporated into other versions of `awk', too. - -All of these features can be turned off either by compiling `gawk' -with `-DSTRICT', or by invoking `gawk' as `awk'. - -The `AWKPATH' environment variable - When opening a file supplied via the `-f' option, if the - filename does not contain a `/', `gawk' will perform a "path - search" for the file, similar to that performed by the shell. - `gawk' gets its search path from the `AWKPATH' environment - variable. If that variable does not exist, it uses the default - path `".:/usr/lib/awk:/usr/local/lib/awk"'. - -Case Independent Matching - Two new operators have been introduced, `~~', and `!~~'. These - perform regular expression match and no-match operations that - are case independent. In other words, `A' and `a' would both - match `/a/'. - -The `-i' option - This option causes the `~' and `!~' operators to behave like the - `~~' and `!~~' operators described above. - -The `-v' option - This option prints version information for this particular copy - of `gawk'. This is so you can determine if your copy of `gawk' - is up to date with respect to whatever the Free Software - Foundation is currently distributing. It may disappear in a - future version of `gawk'. - - - -File: gawk-info, Node: Future Extensions, Next: Improvements, Prev: Extensions, Up: Notes - -Extensions Likely To Appear In A Future Release -=============================================== - -Here are some more extensions that indicate the directions we are -currently considering for `gawk'. Like the previous section, this -section is also subject to change. None of these are implemented yet. - -The `IGNORECASE' special variable - If `IGNORECASE' is non--zero, then *all* regular expression - matching will be done in a case--independent fashion. The `-i' - option and the `~~' and `!~~' operators will go away, as this - mechanism generalizes those facilities. - -More Escape Sequences - The ANSI C `\a', and `\x' escape sequences will be recognized. - Unix `awk' does not recognize `\v', although `gawk' does. - -`RS' as a regexp - The meaning of `RS' will be generalized along the lines of `FS'. - -Transliteration Functions - We are planning on adding `toupper' and `tolower' functions - which will take string arguments, and return strings where the - case of each letter has been transformed to upper-- or - lower--case respectively. - -Access To System File Descriptors - `gawk' will recognize the special file names `/dev/stdin', - `/dev/stdout', `/dev/stderr', and `/dev/fd/N' internally. These - will allow access to inherited file descriptors from within an - `awk' program. - - - -File: gawk-info, Node: Improvements, Next: Manual Improvements, Prev: Future Extensions, Up: Notes - -Suggestions for Future Improvements -=================================== - -Here are some projects that would--be `gawk' hackers might like to -take on. They vary in size from a few days to a few weeks of -programming, depending on which one you choose and how fast a -programmer you are. Please send any improvements you write to the -maintainers at the GNU project. - - 1. State machine regexp matcher: At present, `gawk' uses the - backtracking regular expression matcher from the GNU subroutine - library. If a regexp is really going to be used a lot of times, - it is faster to convert it once to a description of a finite - state machine, then run a routine simulating that machine every - time you want to match the regexp. You could use the matching - routines used by GNU `egrep'. - - 2. Compilation of `awk' programs: `gawk' uses a `Bison' - (YACC--like) parser to convert the script given it into a syntax - tree; the syntax tree is then executed by a simple recursive - evaluator. Both of these steps incur a lot of overhead, since - parsing can be slow (especially if you also do the previous - project and convert regular expressions to finite state machines - at compile time) and the recursive evaluator performs many - procedure calls to do even the simplest things. - - It should be possible for `gawk' to convert the script's parse - tree into a C program which the user would then compile, using - the normal C compiler and a special `gawk' library to provide - all the needed functions (regexps, fields, associative arrays, - type coercion, and so on). - - An easier possibility might be for an intermediate phase of - `awk' to convert the parse tree into a linear byte code form - like the one used in GNU Emacs Lisp. The recursive evaluator - would then be replaced by a straight line byte code interpreter - that would be intermediate in speed between running a compiled - program and doing what `gawk' does now. - - - -File: gawk-info, Node: Manual Improvements, Prev: Improvements, Up: Notes - -Suggestions For Future Improvements of This Manual -================================================== - - 1. An error message section has not been included in this version - of the manual. Perhaps some nice beta testers will document - some of the messages for the future. - - 2. A summary page has not been included, as the ``man'', or help, - page that comes with the `gawk' code should suffice. - - GNU only supports Info, so this manual itself should contain - whatever forms of information it would be useful to have on an - Info summary page. - - 3. A function and variable index has not been included as we are - not sure what to put in it. - - 4. A section summarizing the differences between V7 `awk' and - System V Release 4 `awk' would be useful for long--time `awk' - hackers. - - - -File: gawk-info, Node: Glossary, Next: Index, Prev: Notes, Up: Top - -Glossary -******** - -Action - A series of `awk' statements attached to a rule. If the rule's - pattern matches an input record, the `awk' language executes the - rule's action. Actions are always enclosed in curly braces. - -Amazing `awk' assembler - Henry Spencer at the University of Toronto wrote a retargetable - assembler completely as `awk' scripts. It is thousands of lines - long, including machine descriptions for several 8--bit - microcomputers. It is distributed with `gawk' and is a good - example of a program that would have been better written in - another language. - -Assignment - An `awk' expression that changes the value of some `awk' - variable or data object. An object that you can assign to is - called an "lvalue". - -Built-in function - The `awk' language provides built--in functions that perform - various numerical and string computations. Examples are `sqrt' - (for the square root of a number) and `substr' (for a substring - of a string). - -C - The system programming language that most of GNU is written in. - The `awk' programming language has C--like syntax, and this - manual points out similarities between `awk' and C when - appropriate. - -Compound statement - A series of `awk' statements, enclosed in curly braces. - Compound statements may be nested. - -Concatenation - Concatenating two strings means sticking them together, one - after another, giving a new string. For example, the string - `foo' concatenated with the string `bar' gives the string - `foobar'. - -Conditional expression - A relation that is either true or false, such as `(a < b)'. - Conditional expressions are used in `if' and `while' statements, - and in patterns to select which input records to process. - -Curly braces - The characters `{' and `}'. Curly braces are used in `awk' for - delimiting actions, compound statements, and function bodies. - -Data objects - These are numbers and strings of characters. Numbers are - converted into strings and vice versa, as needed. - -Escape Sequences - A special sequence of characters used for describing - non--printable characters, such as `\n' for newline, or `\033' - for the ASCII ESC (escape) character. - -Field - When `awk' reads an input record, it splits the record into - pieces separated by whitespace (or by a separator regexp which - you can change by setting the special variable `FS'). Such - pieces are called fields. - -Format - Format strings are used to control the appearance of output in - the `printf' statement. Also, data conversions from numbers to - strings are controlled by the format string contained in the - special variable `OFMT'. - -Function - A specialized group of statements often used to encapsulate - general or program--specific tasks. `awk' has a number of - built--in functions, and also allows you to define your own. - -`gawk' - The GNU implementation of `awk'. - -`awk' language - The language in which `awk' programs are written. - -`awk' program - An `awk' program consists of a series of "patterns" and - "actions", collectively known as "rules". For each input record - given to the program, the program's rules are all processed in - turn. `awk' programs may also contain function definitions. - -`awk' script - Another name for an `awk' program. - -Input record - A single chunk of data read in by `awk'. Usually, an `awk' - input record consists of one line of text. - -Keyword - In the `awk' language, a keyword is a word that has special - meaning. Keywords are reserved and may not be used as variable - names. - - The keywords are: `if', `else', `while', `do...while', `for', - `for...in', `break', `continue', `delete', `next', `function', - `func', and `exit'. - -Lvalue - An expression that can appear on the left side of an assignment - operator. In most languages, lvalues can be variables or array - elements. In `awk', a field designator can also be used as an - lvalue. - -Number - A numeric valued data object. The `gawk' implementation uses - double precision floating point to represent numbers. - -Pattern - Patterns tell `awk' which input records are interesting to which - rules. - - A pattern is an arbitrary conditional expression against which - input is tested. If the condition is satisfied, the pattern is - said to "match" the input record. A typical pattern might - compare the input record against a regular expression. - -Range (of input lines) - A sequence of consecutive lines from the input file. A pattern - can specify ranges of input lines for `awk' to process, or it - can specify single lines. - -Recursion - When a function calls itself, either directly or indirectly. If - this isn't clear, refer to the entry for ``recursion''. - -Redirection - Redirection means performing input from other than the standard - input stream, or output to other than the standard output stream. - - You can redirect the output of the `print' and `printf' - statements to a file or a system command, using the `>', `>>', - and `|' operators. You can redirect input to the `getline' - statement using the `<' and `|' operators. - -Regular Expression - See ``regexp''. - -Regexp - Short for "regular expression". A regexp is a pattern that - denotes a set of strings, possibly an infinite set. For - example, the regexp `R.*xp' matches any string starting with the - letter `R' and ending with the letters `xp'. In `awk', regexps - are used in patterns and in conditional expressions. - -Rule - A segment of an `awk' program, that specifies how to process - single input records. A rule consists of a "pattern" and an - "action". `awk' reads an input record; then, for each rule, if - the input record satisfies the rule's pattern, `awk' executes - the rule's action. Otherwise, the rule does nothing for that - input record. - -Special Variable - The variables `ARGC', `ARGV', `ENVIRON', `FILENAME', `FNR', - `FS', `NF', `NR', `OFMT', `OFS', `ORS', `RLENGTH', `RSTART', - `RS', `SUBSEP', have special meaning to `awk'. Changing some of - them affects `awk''s running environment. - -Stream Editor - A program that reads records from an input stream and processes - them one or more at a time. This is in contrast with batch - programs, which may expect to read their input files in entirety - before starting to do anything, and with interactive programs, - which require input from the user. - -String - A datum consisting of a sequence of characters, such as `I am a - string'. Constant strings are written with double--quotes in - the `awk' language, and may contain "escape sequences". - -Whitespace - A sequence of blank or tab characters occurring inside an input - record or a string. - - - -File: gawk-info, Node: Index, Prev: Glossary, Up: Top - -Index -***** - -* Menu: - -* #!: Executable Scripts. -* -f option: Long. -* `$NF', last field in record: Fields. -* `$' (field operator): Fields. -* `>>': Redirection. -* `>': Redirection. -* `BEGIN', special pattern: BEGIN/END. -* `END', special pattern: BEGIN/END. -* `awk' language: This Manual. -* `awk' program: This Manual. -* `break' statement: Break. -* `close' statement for input: Close Input. -* `close' statement for output: Close Output. -* `continue' statement: Continue. -* `delete' statement: Delete. -* `exit' statement: Exit. -* `for (x in ...)': Scanning an Array. -* `for' statement: For. -* `if' statement: If. -* `next' statement: Next. -* `print $0': Very Simple. -* `printf' statement, format of: Basic Printf. -* `printf', format-control characters: Format-Control. -* `printf', modifiers: Modifiers. -* `print' statement: Print. -* `return' statement: Return Statement. -* `while' statement: While. -* `|': Redirection. -* `BBS-list' file: The Files. -* `inventory-shipped' file: The Files. -* Accessing fields: Fields. -* Acronym: History. -* Action, curly braces: Actions. -* Action, curly braces: Getting Started. -* Action, default: Very Simple. -* Action, definition of: Getting Started. -* Action, general: Actions. -* Action, separating statements: Actions. -* Applications of `awk': When. -* Arguments in function call: Function Calls. -* Arguments, Command Line: Command Line. -* Arithmetic operators: Arithmetic Ops. -* Array assignment: Assigning Elements. -* Array reference: Reference to Elements. -* Arrays: Array Intro. -* Arrays, definition of: Array Intro. -* Arrays, deleting an element: Delete. -* Arrays, determining presence of elements: Reference to Elements. -* Arrays, multi-dimensional subscripts: Multi-dimensional. -* Arrays, special `for' statement: Scanning an Array. -* Assignment operators: Assignment Ops. -* Associative arrays: Array Intro. -* Backslash Continuation: Statements/Lines. -* Basic function of `gawk': Getting Started. -* Body of a loop: While. -* Boolean expressions: Boolean Ops. -* Boolean operators: Boolean Ops. -* Boolean patterns: Boolean. -* Built-in functions, list of: Built-in. -* Built-in variables: Variables. -* Calling a function: Function Calls. -* Case sensitivity and gawk: Read Terminal. -* Changing contents of a field: Changing Fields. -* Changing the record separator: Records. -* Closing files and pipes: Close Output. -* Command Line: Command Line. -* Command line formats: Running gawk. -* Command line, setting `FS' on: Field Separators. -* Comments: Comments. -* Comparison expressions: Comparison Ops. -* Comparison expressions as patterns: Comparison Patterns. -* Compound statements: Actions. -* Computed Regular Expressions: Regexp Usage. -* Concatenation: Concatenation. -* Conditional Patterns: Conditional Patterns. -* Conditional expression: Conditional Exp. -* Constants, types of: Constants. -* Continuing statements on the next line: Statements/Lines. -* Conversion of strings and numbers: Conversion. -* Curly braces: Actions. -* Curly braces: Getting Started. -* Default action: Very Simple. -* Default pattern: Very Simple. -* Deleting elements of arrays: Delete. -* Differences between `gawk' and `awk': Arithmetic Ops. -* Differences between `gawk' and `awk': Constants. -* Documenting `awk' programs: Comments. -* Dynamic Regular Expressions: Regexp Usage. -* Element assignment: Assigning Elements. -* Element of array: Reference to Elements. -* Emacs Lisp: When. -* Empty pattern: Empty. -* Escape sequence notation: Constants. -* Examining fields: Fields. -* Executable Scripts: Executable Scripts. -* Expression, conditional: Conditional Exp. -* Expressions: Actions. -* Expressions, boolean: Boolean Ops. -* Expressions, comparison: Comparison Ops. -* Field separator, `FS': Field Separators. -* Field separator, choice of: Field Separators. -* Field separator, setting on command line: Field Separators. -* Field, changing contents of: Changing Fields. -* Fields: Fields. -* Fields, negative-numbered: Non-Constant Fields. -* Fields, semantics of: Field Separators. -* Fields, separating: Field Separators. -* Format specifier: Format-Control. -* Format string: Basic Printf. -* Formatted output: Printf. -* Function call: Function Calls. -* Function definitions: Actions. -* Functions, user-defined: User-defined. -* General input: Reading Files. -* History of `awk': History. -* How gawk works: Two Rules. -* Increment operators: Increment Ops. -* Input file, sample: The Files. -* Input, `getline' function: Getline. -* Input, general: Reading Files. -* Input, multiple line records: Multiple. -* Input, standard: Read Terminal. -* Input, standard: Reading Files. -* Interaction of `awk' with other programs: I/O Functions. -* Invocation of `gawk': Command Line. -* Language, `awk': This Manual. -* Loop: While. -* Loops, breaking out of: Break. -* Lvalue: Assignment Ops. -* Manual, using this: This Manual. -* Metacharacters: Regexp Operators. -* Mod function, semantics of: Arithmetic Ops. -* Modifiers (in format specifiers): Modifiers. -* Multiple line records: Multiple. -* Multiple passes over data: Command Line. -* Multiple statements on one line: Statements/Lines. -* Negative-numbered fields: Non-Constant Fields. -* Number of fields, `NF': Fields. -* Number of records, `FNR': Records. -* Number of records, `NR': Records. -* Numerical constant: Constants. -* Numerical value: Constants. -* One-liners: One-liners. -* Operator, Ternary: Conditional Patterns. -* Operators, `$': Fields. -* Operators, arithmetic: Arithmetic Ops. -* Operators, assignment: Assignment Ops. -* Operators, boolean: Boolean Ops. -* Operators, increment: Increment Ops. -* Operators, regular expression matching: Regexp Usage. -* Operators, relational: Comparison Ops. -* Operators, relational: Comparison Patterns. -* Operators, string: Concatenation. -* Operators, string-matching: Regexp Usage. -* Options, Command Line: Command Line. -* Output: Printing. -* Output field separator, `OFS': Output Separators. -* Output record separator, `ORS': Output Separators. -* Output redirection: Redirection. -* Output, formatted: Printf. -* Output, piping: Redirection. -* Passes, Multiple: Command Line. -* Pattern, case sensitive: Read Terminal. -* Pattern, comparison expressions: Comparison Patterns. -* Pattern, default: Very Simple. -* Pattern, definition of: Getting Started. -* Pattern, empty: Empty. -* Pattern, regular expressions: Regexp. -* Patterns, `BEGIN': BEGIN/END. -* Patterns, `END': BEGIN/END. -* Patterns, Conditional: Conditional Patterns. -* Patterns, boolean: Boolean. -* Patterns, definition of: Patterns. -* Patterns, types of: Patterns. -* Pipes for output: Redirection. -* Printing, general: Printing. -* Program, `awk': This Manual. -* Program, Self contained: Executable Scripts. -* Program, definition of: Getting Started. -* Programs, documenting: Comments. -* Range pattern: Ranges. -* Reading files, `getline' function: Getline. -* Reading files, general: Reading Files. -* Reading files, multiple line records: Multiple. -* Record separator, `RS': Records. -* Records, multiple line: Multiple. -* Redirection of output: Redirection. -* Reference to array: Reference to Elements. -* Regexp: Regexp. -* Regular Expressions, Computed: Regexp Usage. -* Regular Expressions, Dynamic: Regexp Usage. -* Regular expression matching operators: Regexp Usage. -* Regular expression, metacharacters: Regexp Operators. -* Regular expressions as patterns: Regexp. -* Regular expressions, field separators and: Field Separators. -* Relational operators: Comparison Patterns. -* Relational operators: Comparison Ops. -* Removing elements of arrays: Delete. -* Rule, definition of: Getting Started. -* Running gawk programs: Running gawk. -* Sample input file: The Files. -* Scanning an array: Scanning an Array. -* Script, definition of: Getting Started. -* Scripts, Executable: Executable Scripts. -* Scripts, Shell: Executable Scripts. -* Self contained Programs: Executable Scripts. -* Separator character, choice of: Field Separators. -* Shell Scripts: Executable Scripts. -* Single quotes, why they are needed: One-shot. -* Special variables, user modifiable: User-modified. -* Standard input: Read Terminal. -* Standard input: Reading Files. -* Statements: Statements. -* Statements: Actions. -* String constants: Constants. -* String operators: Concatenation. -* String value: Constants. -* String-matching operators: Regexp Usage. -* Subscripts, multi-dimensional in arrays: Multi-dimensional. -* Ternary Operator: Conditional Patterns. -* Use of comments: Comments. -* User-defined functions: User-defined. -* User-defined variables: Variables. -* Uses of `awk': Preface. -* Using this manual: This Manual. -* Variables, built-in: Variables. -* Variables, user-defined: Variables. -* What is `awk': Preface. -* When to use `awk': When. -* file, `awk' program: Long. -* patterns, range: Ranges. -* program file: Long. -* regexp search operators: Regexp Usage. -* running long programs: Long. - - - -Tag Table: -Node: Top918 -Node: Preface2804 -Node: History4267 -Node: License5644 -Node: This Manual18989 -Node: The Files20330 -Node: Getting Started22914 -Node: Very Simple24249 -Node: Two Rules26030 -Node: More Complex28066 -Node: Running gawk30908 -Node: One-shot31827 -Node: Read Terminal32945 -Node: Long33862 -Node: Executable Scripts34991 -Node: Command Line36534 -Node: Comments40168 -Node: Statements/Lines41067 -Node: When43498 -Node: Reading Files45420 -Node: Records47119 -Node: Fields49902 -Node: Non-Constant Fields52789 -Node: Changing Fields54591 -Node: Field Separators57302 -Node: Multiple62004 -Node: Assignment Options64393 -Node: Getline65608 -Node: Close Input74958 -Node: Printing76023 -Node: Print76748 -Node: Print Examples78712 -Node: Output Separators80751 -Node: Redirection82417 -Node: Close Output85886 -Node: Printf88132 -Node: Basic Printf88908 -Node: Format-Control90261 -Node: Modifiers91806 -Node: Printf Examples93108 -Node: One-liners95707 -Node: Patterns97642 -Node: Empty100130 -Node: Regexp100402 -Node: Regexp Usage101173 -Node: Regexp Operators102947 -Node: Comparison Patterns107890 -Node: Ranges109336 -Node: BEGIN/END110722 -Node: Boolean113151 -Node: Conditional Patterns115605 -Node: Actions116105 -Node: Expressions117435 -Node: Constants119124 -Node: Variables121097 -Node: Arithmetic Ops122454 -Node: Concatenation123840 -Node: Comparison Ops124569 -Node: Boolean Ops125973 -Node: Assignment Ops128266 -Node: Increment Ops131817 -Node: Conversion134112 -Node: Conditional Exp136066 -Node: Function Calls137384 -Node: Statements139939 -Node: If141253 -Node: While142627 -Node: Do144232 -Node: For145265 -Node: Break148306 -Node: Continue149848 -Node: Next151476 -Node: Exit152985 -Node: Arrays154514 -Node: Array Intro155624 -Node: Reference to Elements159227 -Node: Assigning Elements161115 -Node: Array Example161615 -Node: Scanning an Array163336 -Node: Delete165642 -Node: Multi-dimensional166529 -Node: Multi-scanning169746 -Node: Built-in171303 -Node: Numeric Functions172806 -Node: String Functions176601 -Node: I/O Functions183717 -Node: User-defined185189 -Node: Definition Syntax185834 -Node: Function Example187928 -Node: Function Caveats189034 -Node: Return Statement191386 -Node: Special193612 -Node: User-modified194478 -Node: Auto-set196511 -Node: Sample Program200558 -Node: Notes204316 -Node: Extensions204909 -Node: Future Extensions206490 -Node: Improvements207922 -Node: Manual Improvements210034 -Node: Glossary210928 -Node: Index217934 diff --git a/gawk.1 b/gawk.1 deleted file mode 100644 index 3d2068b8..00000000 --- a/gawk.1 +++ /dev/null @@ -1,1344 +0,0 @@ -.TH GAWK 1 "Free Software Foundation" -.SH NAME -gawk \- pattern scanning and processing language -.SH SYNOPSIS -.B gawk -.ig -[ -.B \-d -] [ -.B \-D -] [ -.B \-v -] [ -.B \-V -] -.. -[ -.BI \-F\^ fs -] -.B \-f -.I program-file -[ -.B \-f -.I program-file -\&.\^.\^. ] [ -.B \-\^\- -] file .\^.\^. -.br -.B gawk -.ig -[ -.B \-d -] [ -.B \-D -] [ -.B \-v -] [ -.B \-V -] -.. -[ -.BI \-F\^ fs -] [ -.B \-\^\- -] -.I program-text -file .\^.\^. -.SH DESCRIPTION -.I Gawk -is the GNU Project's implementation of the AWK programming language. -It conforms to the definition and description of the language in -.IR "The AWK Programming Language" , -by Aho, Kernighan, and Weinberger, -with the additional features defined in the System V Release 4 version -of \s-1UNIX\s+1 -.IR awk . -.PP -The command line consists of options to -.I gawk -itself, the AWK program text (if not supplied via the -.B \-f -option), and values to be made -available in the -.B ARGC -and -.B ARGV -pre-defined AWK variables. -.PP -The options that -.I gawk -accepts are: -.TP -.BI \-F fs -Use -.I fs -for the input field separator (the value of the -.B FS -predefined -variable). -.TP -.BI \-f " program-file" -Read the AWK program source from the file -.IR program-file , -instead of from the first command line argument. -.TP -.B \-\^\- -Signal the end of options. This is useful to allow further arguments to the -AWK program itself to start with a ``\-''. -This is mainly for consistency with the argument parsing convention used -by most other System V programs. -.PP -Any other options are flagged as illegal, but are otherwise ignored. -(However, see the -.B "GNU EXTENSIONS" -section, below.) -.PP -An AWK program consists of a sequence of pattern-action statements -and optional function definitions. -.RS -.PP -\fIpattern\fB { \fIaction statements\fB }\fR -.br -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR -.RE -.PP -.I Gawk -first reads the program source from the -.IR program-file (s) -if specified, or from the first non-option argument on the command line. -The -.B \-f -option may be used multiple times on the command line. -.I Gawk -will read the program text as if all the -.IR program-file s -had been concatenated together. This is useful for building libraries -of AWK functions, without having to include them in each new AWK -program that uses them. To use a library function in a file from a -program typed in on the command line, specify -.B /dev/tty -as one of the -.IR program-file s, -type your program, and end it with a -.B ^D -(control-d). -.PP -The environment variable -.B AWKPATH -specifies a search path to use when finding source files named with -the -.B \-f -option. If this variable does not exist, the default path is -\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR. -If a file name given to the -.B \-f -option contains a ``/'' character, no path search is performed. -.PP -.I Gawk -compiles the program into an internal form, -and then proceeds to read -each file named in the -.B ARGV -array. -If there are no files named on the command line, -.I gawk -reads the standard input. -.PP -If a ``file'' named on the command line has the form -.IB var = val -it is treated as a variable assignment. The variable -.I var -will be assigned the value -.IR val . -This is most useful for dynamically assigning values to the variables -AWK uses to control how input is broken into fields and records. It -is also useful for controlling state if multiple passes are needed over -a single data file. -.PP -For each line in the input, -.I gawk -tests to see if it matches any -.I pattern -in the AWK program. -For each pattern that the line matches, the associated -.I action -is executed. -.SH VARIABLES AND FIELDS -AWK variables are dynamic; they come into existence when they are -first used. Their values are either floating-point numbers or strings, -depending upon how they are used. AWK also has single dimension -arrays; multiply dimensioned arrays may be simulated. -There are several pre-defined variables that AWK sets as a program -runs; these will be described as needed and summarized below. -.PP -As each input line is read, -.I gawk -splits the line into -.IR fields , -using the value of the -.B FS -variable as the field separator. -If -.B FS -is a single character, fields are separated by that character. -Otherwise, -.B FS -is expected to be a full regular expression. -In the special case that -.B FS -is a single blank, fields are separated -by runs of blanks and/or tabs. -Note that the value of -.B IGNORECASE -(see below) will also affect how fields are split when -.B FS -is a regular expression. -.PP -Each field in the input line may be referenced by its position, -.BR $1 , -.BR $2 , -and so on. -.B $0 -is the whole line. The value of a field may be assigned to as well. -Fields need not be referenced by constants: -.RS -.PP -.ft B -n = 5 -.br -print $n -.ft R -.RE -.PP -prints the fifth field in the input line. -The variable -.B NF -is set to the total number of fields in the input line. -.PP -References to non-existent fields (i.e. fields after -.BR $NF ), -produce the null-string. However, assigning to a non-existent field -(e.g., -.BR "$(NF+2) = 5" ) -will increase the value of -.BR NF , -create any intervening fields with the null string as their value, and -cause the value of -.B $0 -to be recomputed, with the fields being separated by the value of -.BR OFS . -.SS Built-in Variables -.PP -AWK's built-in variables are: -.PP -.RS -.TP \l'\fBIGNORECASE\fR' -.B ARGC -the number of command line arguments (does not include options to -.IR gawk , -or the program source). -.TP \l'\fBIGNORECASE\fR' -.B ARGV -array of command line arguments. The array is indexed from -0 to -.B ARGC -\- 1. -Dynamically changing the contents of -.B ARGV -can control the files used for data. -.TP \l'\fBIGNORECASE\fR' -.B ENVIRON -An array containing the values of the current environment. -The array is indexed by the environment variables, each element being -the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be -.BR /u/arnold ). -Changing this array does not affect the environment seen by programs which -.I gawk -spawns via redirection or the -.B system -function. -.TP \l'\fBIGNORECASE\fR' -.B FILENAME -the name of the current input file. -If no files are specified on the command line, the value of -.B FILENAME -is ``\-''. -.TP \l'\fBIGNORECASE\fR' -.B FNR -the input record number in the current input file. -.TP \l'\fBIGNORECASE\fR' -.B FS -the input field separator, a blank by default. -.TP \l'\fBIGNORECASE\fR' -.B IGNORECASE -Controls the case-sensitivity of all regular expression operations. If -.B IGNORECASE -has a non-zero value, then pattern matching in rules, -field splitting with -.BR FS , -regular expression -matching with -.B ~ -and -.BR !~ , -and the -.BR gsub() , -.BR match() , -.BR split() , -and -.B sub() -pre-defined functions will all ignore case when doing regular expression -operations. Thus, if -.B IGNORECASE -is not equal to zero, -.B /aB/ -matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, -and \fB"AB"\fP. -As with all AWK variables, the initial value of -.B IGNORECASE -is zero, so all regular expression operations are normally case-sensitive. -.TP \l'\fBIGNORECASE\fR' -.B NF -the number of fields in the current input record. -.TP \l'\fBIGNORECASE\fR' -.B NR -the total number of input records seen so far. -.TP \l'\fBIGNORECASE\fR' -.B OFMT -the output format for numbers, -.B %.6g -by default. -.TP \l'\fBIGNORECASE\fR' -.B OFS -the output field separator, a blank by default. -.TP \l'\fBIGNORECASE\fR' -.B ORS -the output record separator, by default a newline. -.TP \l'\fBIGNORECASE\fR' -.B RS -the input record separator, by default a newline. -.B RS -is exceptional in that only the first character of its string -value is used for separating records. If -.B RS -is set to the null string, then records are separated by -blank lines. -When -.B RS -is set to the null string, then the newline character always acts as -a field separator, in addition to whatever value -.B FS -may have. -.TP \l'\fBIGNORECASE\fR' -.B RSTART -the index of the first character matched by -.BR match() ; -0 if no match. -.TP \l'\fBIGNORECASE\fR' -.B RLENGTH -the length of the string matched by -.BR match() ; -\-1 if no match. -.TP \l'\fBIGNORECASE\fR' -.B SUBSEP -the character used to separate multiple subscripts in array -elements, by default \fB"\e034"\fR. -.RE -.SS Arrays -.PP -Arrays are subscripted with an expression between square brackets -.RB ( [ " and " ] ). -If the expression is an expression list -.RI ( expr ", " expr " ...)" -then the array subscript is a string consisting of the -concatenation of the (string) value of each expression, -separated by the value of the -.B SUBSEP -variable. -This facility is used to simulate multiply dimensioned -arrays. For example: -.PP -.RS -.ft B -i = "A" ;\^ j = "B" ;\^ k = "C" -.br -x[i,j,k] = "hello, world\en" -.ft R -.RE -.PP -assigns the string \fB"hello, world\en"\fR to the element of the array -.B x -which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK -are associative, i.e. indexed by string values. -.PP -The special operator -.B in -may be used in an -.B if -or -.B while -statement to see if an array has an index consisting of a particular -value. -.PP -.RS -.ft B -.nf -if (val in array) - print array[val] -.fi -.ft -.RE -.PP -If the array has multiple subscripts, use -.BR "(i, j) in array" . -.PP -The -.B in -construct may also be used in a -.B for -loop to iterate over all the elements of an array. -.PP -An element may be deleted from an array using the -.B delete -statement. -.SS Variable Typing -.PP -Variables and fields -may be (floating point) numbers, or strings, or both. How the -value of a variable is interpreted depends upon its context. If used in -a numeric expression, it will be treated as a number, if used as a string -it will be treated as a string. -.PP -To force a variable to be treated as a number, add 0 to it; to force it -to be treated as a string, concatenate it with the null string. -.PP -The AWK language defines comparisons as being done numerically if -possible, otherwise one or both operands are converted to strings and -a string comparison is performed. -.PP -Uninitialized variables have the numeric value 0 and the string value "" -(the null, or empty, string). -.SH PATTERNS AND ACTIONS -AWK is a line oriented language. The pattern comes first, and then the -action. Action statements are enclosed in -.B { -and -.BR } . -Either the pattern may be missing, or the action may be missing, but, -of course, not both. If the pattern is missing, the action will be -executed for every single line of input. -A missing action is equivalent to -.RS -.PP -.B "{ print }" -.RE -.PP -which prints the entire line. -.PP -Comments begin with the ``#'' character, and continue until the -end of the line. -Blank lines may be used to separate statements. -Normally, a statement ends with a newline, however, this is not the -case for lines ending in -a ``,'', ``{'', ``?'', ``:'', ``&&'', or ``||''. -Lines ending in -.B do -or -.B else -also have their statements automatically continued on the following line. -In other cases, a line can be continued by ending it with a ``\e'', -in which case the newline will be ignored. -.PP -Multiple statements may -be put on one line by separating them with a ``;''. -This applies to both the statements within the action part of a -pattern-action pair (the usual case), -and to the pattern-action statements themselves. -.SS Patterns -AWK patterns may be one of the following: -.PP -.RS -.nf -.B BEGIN -.B END -.BI / "regular expression" / -.I "relational expression" -.IB pattern " && " pattern -.IB pattern " || " pattern -.IB pattern " ? " pattern " : " pattern -.BI ( pattern ) -.BI ! " pattern" -.IB pattern1 ", " pattern2" -.fi -.RE -.PP -.B BEGIN -and -.B END -are two special kinds of patterns which are not tested against -the input. -The action parts of all -.B BEGIN -patterns are merged as if all the statements had -been written in a single -.B BEGIN -block. They are executed before any -of the input is read. Similarly, all the -.B END -blocks are merged, -and executed when all the input is exhausted (or when an -.B exit -statement is executed). -.B BEGIN -and -.B END -patterns cannot be combined with other patterns in pattern expressions. -.B BEGIN -and -.B END -patterns cannot have missing action parts. -.PP -For -.BI / "regular expression" / -patterns, the associated statement is executed for each input line that matches -the regular expression. -Regular expressions are the same as those in -.IR egrep (1), -and are summarized below. -.PP -A -.I "relational expression" -may use any of the operators defined below in the section on actions. -These generally test whether certain fields match certain regular expressions. -.PP -The -.BR && , -.BR || , -and -.B ! -operators are logical AND, logical OR, and logical NOT, respectively, as in C. -They do short-circuit evaluation, also as in C, and are used for combining -more primitive pattern expressions. As in most languages, parentheses -may be used to change the order of evaluation. -.PP -The -.B ?\^: -operator is like the same operator in C. If the first pattern is true -then the pattern used for testing is the second pattern, otherwise it is -the third. Only one of the second and third patterns is evaluated. -.PP -The -.IB pattern1 ", " pattern2" -form of an expression is called a range pattern. -It matches all input lines starting with a line that matches -.IR pattern1 , -and continuing until a line that matches -.IR pattern2 , -inclusive. It does not combine with any other sort of pattern expression. -.SS Regular Expressions -Regular expressions are the extended kind found in -.IR egrep . -They are composed of characters as follows: -.RS -.TP \l'[^abc...]' -.I c -matches the non-metacharacter -.IR c . -.TP \l'[^abc...]' -.I \ec -matches the literal character -.IR c . -.TP \l'[^abc...]' -.B . -matches any character except newline. -.TP \l'[^abc...]' -.B ^ -matches the beginning of a line or a string. -.TP \l'[^abc...]' -.B $ -matches the end of a line or a string. -.TP \l'[^abc...]' -.BI [ abc... ] -character class, matches any of the characters -.IR abc... . -.TP \l'[^abc...]' -.BI [^ abc... ] -negated character class, matches any character except -.I abc... -and newline. -.TP \l'[^abc...]' -.IB r1 | r2 -alternation: matches either -.I r1 -or -.IR r2 . -.TP \l'[^abc...]' -.I r1r2 -concatenation: matches -.IR r1 , -and then -.IR r2 . -.TP \l'[^abc...]' -.IB r + -matches one or more -.IR r 's. -.TP \l'[^abc...]' -.IB r * -matches zero or more -.IR r 's. -.TP \l'[^abc...]' -.IB r ? -matches zero or one -.IR r 's. -.TP \l'[^abc...]' -.BI ( r ) -grouping: matches -.IR r . -.RE -.SS Actions -Action statements are enclosed in braces, -.B { -and -.BR } . -Action statements consist of the usual assignment, conditional, and looping -statements found in most languages. The operators, control statements, -and input/output statements -available are patterned after those in C. -.PP -The operators in AWK, in order of increasing precedence, are -.PP -.RS -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "= += \-= *= /= %= ^=" -Assignment. Both absolute assignment -.BI ( var " = " value ) -and operator-assignment (the other forms) are supported. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B ?: -The C conditional expression. This has the form -.IB expr1 " ? " expr2 " : " expr3\c -\&. If -.I expr1 -is true, the value of the expression is -.IR expr2 , -otherwise it is -.IR expr3 . -Only one of -.I expr2 -and -.I expr3 -is evaluated. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B || -logical OR. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B && -logical AND. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "~ !~" -regular expression match, negated match. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "< <= > >= != ==" -the regular relational operators. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.I blank -string concatenation. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "+ \-" -addition and subtraction. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "* / %" -multiplication, division, and modulus. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "+ \- !" -unary plus, unary minus, and logical negation. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B ^ -exponentiation (\fB**\fR may also be used, and \fB**=\fR for -the assignment operator). -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B "++ \-\^\-" -increment and decrement, both prefix and postfix. -.TP \l'\fB= += \-= *= /= %= ^=\fR' -.B $ -field reference. -.RE -.PP -The control statements are -as follows: -.PP -.RS -.nf -\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR] -\fBwhile (\fIcondition\fB) \fIstatement \fR -\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR -\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR -\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR -\fBbreak\fR -\fBcontinue\fR -\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR -\fBexit\fR [ \fIexpression\fR ] -\fB{ \fIstatements \fB} -.fi -.RE -.PP -The input/output statements are as follows: -.PP -.RS -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI close( filename ) -close file (or pipe, see below). -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.B getline -set -.B $0 -from next input record; set -.BR NF , -.BR NR , -.BR FNR . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI "getline <" file -set -.B $0 -from next record of -.IR file ; -set -.BR NF . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI getline " var" -set -.I var -from next input record; set -.BR NF , -.BR FNR . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI getline " var" " <" file -set -.I var -from next record of -.IR file . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.B next -Stop processing the current input record. The next input record -is read and processing starts over with the first pattern in the -AWK program. If the end of the input data is reached, the -.B END -block(s), if any, are executed. -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.B print -prints the current record. -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI print " expr-list" -prints expressions. -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI print " expr-list" " >" file -prints expressions on -.IR file . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI printf " fmt, expr-list" -format and print. -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI printf " fmt, expr-list" " >" file -format and print on -.IR file . -.TP \l'\fBprintf \fIfmt, expr-list\fR' -.BI system( cmd-line ) -execute the command -.IR cmd-line , -and return the exit status. -(This may not be available on -systems besides \s-1UNIX\s+1 and \s-1GNU\s+1.) -.RE -.PP -Other input/output redirections are also allowed. For -.B print -and -.BR printf , -.BI >> file -appends output to the -.IR file , -while -.BI | " command" -writes on a pipe. -In a similar fashion, -.IB command " | getline" -pipes into -.BR getline . -.BR Getline -will return 0 on end of file, and \-1 on an error. -.PP -The AWK versions of the -.B printf -and -.B sprintf -(see below) -functions accept the following conversion specification formats: -.RS -.TP -.B %c -An ASCII character. -If the argument used for -.B %c -is numeric, it is treated as a character and printed. -Otherwise, the argument is assumed to be a string, and the only first -character of that string is printed. -.TP -.B %d -A decimal number (the integer part). -.TP -.B %e -A floating point number of the form -.BR [\-]d.ddddddE[+\^\-]dd . -.TP -.B %f -A floating point number of the form -.BR [\-]ddd.dddddd . -.TP -.B %g -Use -.B e -or -.B f -conversion, whichever is shorter, with nonsignificant zeros suppressed. -.TP -.B %o -An unsigned octal number (again, an integer). -.TP -.B %s -A character string. -.TP -.B %x -An unsigned hexadecimal number (an integer). -.TP -.B %% -A single -.B % -character; no argument is converted. -.RE -.PP -There are optional, additional parameters that may lie between the -.B % -and the control letter: -.RS -.TP -.B \- -The expression should be left-justified within its field. -.TP -.I width -The field should be padded to this width. If the number has a leading -zero, then the field will be padded with zeros. -Otherwise it is padded with blanks. -.TP -.BI . prec -A number indicating the maximum width of strings or digits to the right -of the decimal point. -.RE -.PP -The dynamic -.I width -and -.I prec -capabilities of the C library -.B printf -routines are not supported. -However, they may be simulated by using -the AWK concatenation operation to build up -a format specification dynamically. -.PP -When doing I/O redirection from either -.B print -or -.B printf -into a file, -or via -.B getline -from a file, -.I gawk -recognizes certain special filenames internally. These filenames -allow access to open file descriptors inherited from -.IR gawk 's -parent process (usually the shell). The filenames are: -.RS -.TP -.B /dev/stdin -The standard input. -.TP -.B /dev/stdout -The standard output. -.TP -.B /dev/stderr -The standard error output. -.TP -.BI /dev/fd/\^ n -The file denoted by the open file descriptor -.IR n . -.RE -.PP -These are particularly useful for error messages. For example: -.PP -.RS -.ft B -print "You blew it!" > "/dev/stderr" -.ft R -.RE -.PP -whereas you would otherwise have to use -.PP -.RS -.ft B -print "You blew it!" | "cat 1>&2" -.ft R -.RE -.PP -These file names may also be used on the command line to name data files. -.PP -AWK has the following pre-defined arithmetic functions: -.PP -.RS -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI atan2( y , " x" ) -returns the arctangent of -.I y/x -in radians. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI cos( expr ) -returns the cosine in radians. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI exp( expr ) -the exponential function. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI int( expr ) -truncates to integer. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI log( expr ) -the natural logarithm function. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.B rand() -returns a random number between 0 and 1. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI sin( expr ) -returns the sine in radians. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI sqrt( expr ) -the square root function. -.TP \l'\fBsrand(\fIexpr\fB)\fR' -.BI srand( expr ) -use -.I expr -as a new seed for the random number generator. If no -.I expr -is provided, the time of day will be used. -The return value is the previous seed for the random -number generator. -.RE -.PP -AWK has the following pre-defined string functions: -.PP -.RS -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR -for each substring matching the regular expression -.I r -in the string -.IR t , -substitute the string -.IR s , -and return the number of substitutions. -If -.I t -is not supplied, use -.BR $0 . -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI index( s , " t" ) -returns the index of the string -.I t -in the string -.IR s , -or 0 if -.I t -is not present. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI length( s ) -returns the length of the string -.IR s . -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI match( s , " r" ) -returns the position in -.I s -where the regular expression -.I r -occurs, or 0 if -.I r -is not present, and sets the values of -.B RSTART -and -.BR RLENGTH . -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR -splits the string -.I s -into the array -.I a -on the regular expression -.IR r , -and returns the number of fields. If -.I r -is omitted, -.B FS -is used instead. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI sprintf( fmt , " expr-list" ) -prints -.I expr-list -according to -.IR fmt , -and returns the resulting string. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR -this is just like -.BR gsub , -but only the first matching substring is replaced. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR -returns the -.IR n -character -substring of -.I s -starting at -.IR i . -If -.I n -is omitted, the rest of -.I s -is used. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI tolower( str ) -returns a copy of the string -.IR str , -with all the upper-case characters in -.I str -translated to their corresponding lower-case counterparts. -Non-alphabetic characters are left unchanged. -.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' -.BI toupper( str ) -returns a copy of the string -.IR str , -with all the lower-case characters in -.I str -translated to their corresponding upper-case counterparts. -Non-alphabetic characters are left unchanged. -.RE -.PP -String constants in AWK are sequences of characters enclosed -between double quotes (\fB"\fR). Within strings, certain -.I "escape sequences" -are recognized, as in C. These are: -.PP -.RS -.TP \l'\fB\e\fIddd\fR' -.B \e\e -A literal backslash. -.TP \l'\fB\e\fIddd\fR' -.B \ea -The ``alert'' character; usually the ASCII BEL character. -.TP \l'\fB\e\fIddd\fR' -.B \eb -backspace. -.TP \l'\fB\e\fIddd\fR' -.B \ef -form-feed. -.TP \l'\fB\e\fIddd\fR' -.B \en -new line. -.TP \l'\fB\e\fIddd\fR' -.B \er -carriage return. -.TP \l'\fB\e\fIddd\fR' -.B \et -horizontal tab. -.TP \l'\fB\e\fIddd\fR' -.B \ev -vertical tab. -.TP \l'\fB\e\fIddd\fR' -.BI \ex "\^hex digits" -The character represented by the string of hexadecimal digits following -the -.BR \ex . -As in ANSI C, all following hexadecimal digits are considered part of -the escape sequence. -(This feature should tell us something about language design by committee.) -E.g., "\ex1B" is the ASCII ESC (escape) character. -.TP \l'\fB\e\fIddd\fR' -.BI \e ddd -The character represented by the 1-, 2-, or 3-digit sequence of octal -digits. E.g. "\e033" is the ASCII ESC (escape) character. -.TP \l'\fB\e\fIddd\fR' -.BI \e c -The literal character -.IR c\^ . -.RE -.PP -The escape sequences may also be used inside constant regular expressions -(e.g., -.B "/[\ \et\ef\en\er\ev]/" -matches whitespace characters). -.SH FUNCTIONS -Functions in AWK are defined as follows: -.PP -.RS -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR -.RE -.PP -Functions are executed when called from within the action parts of regular -pattern-action statements. Actual parameters supplied in the function -call are used to instantiate the formal parameters declared in the function. -Arrays are passed by reference, other variables are passed by value. -.PP -Since functions were not originally part of the AWK language, the provision -for local variables is rather clumsy: they are declared as extra parameters -in the parameter list. The convention is to separate local variables from -real parameters by extra spaces in the parameter list. For example: -.PP -.RS -.ft B -.nf -function f(p, q, a, b) { # a & b are local - ..... } - -/abc/ { ... ; f(1, 2) ; ... } -.fi -.ft R -.RE -.PP -The left parenthesis in a function call is required -to immediately follow the function name, -without any intervening white space. -This is to avoid a syntactic ambiguity with the concatenation operator. -This restriction does not apply to the built-in functions listed above. -.PP -Functions may call each other and may be recursive. -Function parameters used as local variables are initialized -to the null string and the number zero upon function invocation. -.PP -The word -.B func -may be used in place of -.BR function . -.SH EXAMPLES -.nf -Print and sort the login names of all users: - -.ft B - BEGIN { FS = ":" } - { print $1 | "sort" } - -.ft R -Count lines in a file: - -.ft B - { nlines++ } - END { print nlines } - -.ft R -Precede each line by its number in the file: - -.ft B - { print FNR, $0 } - -.ft R -Concatenate and line number (a variation on a theme): - -.ft B - { print NR, $0 } -.ft R -.SH SEE ALSO -.IR "The AWK Programming Language" , -Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger, -Addison-Wesley, 1988. ISBN 0-201-07981-X. -.SH SYSTEM V RELEASE 4 COMPATIBILITY -A primary goal for -.I gawk -is compatibility with the latest version of \s-1UNIX\s+1 -.IR awk . -To this end, -.I gawk -incorporates the following user visible -features which are not described in the AWK book, -but are part of -.I awk -in System V Release 4. -.PP -When processing arguments, -.I gawk -uses the special option ``\fB\-\^\-\fP'' to signal the end of -arguments, and warns about, but otherwise ignores, undefined options. -.PP -The AWK book does not define the return value of -.BR srand() . -The System V Release 4 version of \s-1UNIX\s+1 -.I awk -has it return the seed it was using, to allow keeping track -of random number sequences. Therefore -.B srand() -in -.I gawk -also returns its current seed. -.PP -The use of multiple -.B \-f -options is a new feature, as is the -.B ENVIRON -array. -.SH GNU EXTENSIONS -.I Gawk -has some extensions to System V -.IR awk . -They are described in this section. All the extensions described here -can be disabled by compiling -.I gawk -with -.BR \-DSTRICT , -or by invoking -.I gawk -with the name -.IR awk . -If the underlying operating system supports the -.B /dev/fd -directory and corresponding files, then -.I gawk -can be compiled with -.B \-DNO_DEV_FD -to disable the special filename processing. -.PP -The following features of -.I gawk -are not available in -System V -.IR awk . -.RS -.TP \l'\(bu' -\(bu -The -.BR \ea , -.BR \ev , -or -.B \ex -escape sequences are not recognized. -.TP \l'\(bu' -\(bu -The special file names available for I/O redirection are not recognized. -.TP \l'\(bu' -\(bu -The -.B tolower -and -.B toupper -built-in string functions are not available. -.TP \l'\(bu' -\(bu -The -.B IGNORECASE -variable and its side-effects are not available. -.TP \l'\(bu' -\(bu -No path search is performed for files named via the -.B \-f -option. Therefore the -.B AWKPATH -environment variable is not special. -.RE -.PP -The AWK book does not define the return value of the -.B close -function. -.IR Gawk\^ 's -.B close -returns the value from -.IR fclose (3), -or -.IR pclose (3), -when closing a file or pipe, respectively. -.PP -When -.I gawk -is invoked as -.IR awk , -if the -.I fs -argument to the -.B \-F -option is ``t'', then -.B FS -will be set to the tab character. -Since this is a rather ugly special case, it is not the default behavior. -.PP -The rest of the features described in this section may change at some time in -the future, or may go away entirely. -You should not write programs that depend upon them. -.PP -.I Gawk -accepts the following additional options: -.ig -.TP -.B \-D -Turn on general debugging and turn on -.IR yacc (1) -or -.IR bison (1) -debugging output during program parsing. -This option should only be of interest to the -.I gawk -maintainers, and may not even be compiled into -.IR gawk . -.TP -.B \-d -Turn on general debugging and print the -.I gawk -internal tree as the program is executed. -This option should only be of interest to the -.I gawk -maintainers, and may not even be compiled into -.IR gawk . -.. -.TP -.B \-v -Print version information for this particular copy of -.I gawk -on the error output. -This is useful mainly for knowing if the current copy of -.I gawk -on your system -is up to date with respect to whatever the Free Software Foundation -is distributing. -.TP -.B \-V -Print the GNU copyright information message on the error output. -.SH BUGS -The -.B \-F -option is not necessary given the command line variable assignment feature; -it remains only for backwards compatibility. -.SH AUTHORS -The original version of \s-1UNIX\s+1 -.I awk -was designed and implemented by Alfred Aho, -Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan -continues to maintain and enhance it. -.PP -Paul Rubin and Jay Fenlason, -of the Free Software Foundation, wrote -.IR gawk , -to be compatible with the original version of -.I awk -distributed in Seventh Edition \s-1UNIX\s+1. -John Woods contributed a number of bug fixes. -David Trueman of Dalhousie University, with contributions -from Arnold Robbins at Emory University, made -.I gawk -compatible with the new version of \s-1UNIX\s+1 -.IR awk . -.SH ACKNOWLEDGEMENTS -Brian Kernighan of Bell Labs -provided valuable assistance during testing and debugging. -We thank him. diff --git a/gawk.aux b/gawk.aux deleted file mode 100644 index 9c137c3b..00000000 --- a/gawk.aux +++ /dev/null @@ -1,202 +0,0 @@ -'xrdef {Preface-pg}{1} -'xrdef {Preface-snt}{} -'xrdef {History-pg}{1} -'xrdef {History-snt}{} -'xrdef {License-pg}{3} -'xrdef {License-snt}{} -'xrdef {This Manual-pg}{9} -'xrdef {This Manual-snt}{chapter'tie1} -'xrdef {The Files-pg}{9} -'xrdef {The Files-snt}{section'tie1.1} -'xrdef {Getting Started-pg}{11} -'xrdef {Getting Started-snt}{chapter'tie2} -'xrdef {Very Simple-pg}{11} -'xrdef {Very Simple-snt}{section'tie2.1} -'xrdef {Two Rules-pg}{12} -'xrdef {Two Rules-snt}{section'tie2.2} -'xrdef {More Complex-pg}{13} -'xrdef {More Complex-snt}{section'tie2.3} -'xrdef {Running gawk-pg}{14} -'xrdef {Running gawk-snt}{section'tie2.4} -'xrdef {One-shot-pg}{15} -'xrdef {One-shot-snt}{section'tie2.4.1} -'xrdef {Read Terminal-pg}{15} -'xrdef {Read Terminal-snt}{section'tie2.4.2} -'xrdef {Long-pg}{16} -'xrdef {Long-snt}{section'tie2.4.3} -'xrdef {Executable Scripts-pg}{17} -'xrdef {Executable Scripts-snt}{section'tie2.4.4} -'xrdef {Command Line-pg}{18} -'xrdef {Command Line-snt}{section'tie2.4.5} -'xrdef {Comments-pg}{19} -'xrdef {Comments-snt}{section'tie2.5} -'xrdef {Statements/Lines-pg}{20} -'xrdef {Statements/Lines-snt}{section'tie2.6} -'xrdef {When-pg}{21} -'xrdef {When-snt}{section'tie2.7} -'xrdef {Reading Files-pg}{23} -'xrdef {Reading Files-snt}{chapter'tie3} -'xrdef {Records-pg}{23} -'xrdef {Records-snt}{section'tie3.1} -'xrdef {Fields-pg}{24} -'xrdef {Fields-snt}{section'tie3.2} -'xrdef {Non-Constant Fields-pg}{26} -'xrdef {Non-Constant Fields-snt}{section'tie3.3} -'xrdef {Changing Fields-pg}{27} -'xrdef {Changing Fields-snt}{section'tie3.4} -'xrdef {Field Separators-pg}{28} -'xrdef {Field Separators-snt}{section'tie3.5} -'xrdef {Multiple-pg}{31} -'xrdef {Multiple-snt}{section'tie3.6} -'xrdef {Assignment Options-pg}{32} -'xrdef {Assignment Options-snt}{section'tie3.7} -'xrdef {Getline-pg}{32} -'xrdef {Getline-snt}{section'tie3.8} -'xrdef {Close Input-pg}{36} -'xrdef {Close Input-snt}{section'tie3.8.1} -'xrdef {Printing-pg}{39} -'xrdef {Printing-snt}{chapter'tie4} -'xrdef {Print-pg}{39} -'xrdef {Print-snt}{section'tie4.1} -'xrdef {Print Examples-pg}{40} -'xrdef {Print Examples-snt}{section'tie4.2} -'xrdef {Output Separators-pg}{41} -'xrdef {Output Separators-snt}{section'tie4.3} -'xrdef {Redirection-pg}{42} -'xrdef {Redirection-snt}{section'tie4.4} -'xrdef {Close Output-pg}{43} -'xrdef {Close Output-snt}{section'tie4.4.1} -'xrdef {Printf-pg}{44} -'xrdef {Printf-snt}{section'tie4.5} -'xrdef {Basic Printf-pg}{45} -'xrdef {Basic Printf-snt}{section'tie4.5.1} -'xrdef {Format-Control-pg}{45} -'xrdef {Format-Control-snt}{section'tie4.5.2} -'xrdef {Modifiers-pg}{46} -'xrdef {Modifiers-snt}{section'tie4.5.3} -'xrdef {Printf Examples-pg}{46} -'xrdef {Printf Examples-snt}{section'tie4.5.4} -'xrdef {One-liners-pg}{49} -'xrdef {One-liners-snt}{chapter'tie5} -'xrdef {Patterns-pg}{51} -'xrdef {Patterns-snt}{chapter'tie6} -'xrdef {Empty-pg}{51} -'xrdef {Empty-snt}{section'tie6.1} -'xrdef {Regexp-pg}{52} -'xrdef {Regexp-snt}{section'tie6.2} -'xrdef {Regexp Usage-pg}{52} -'xrdef {Regexp Usage-snt}{section'tie6.2.1} -'xrdef {Regexp Operators-pg}{53} -'xrdef {Regexp Operators-snt}{section'tie6.2.2} -'xrdef {Comparison Patterns-pg}{55} -'xrdef {Comparison Patterns-snt}{section'tie6.3} -'xrdef {Ranges-pg}{56} -'xrdef {Ranges-snt}{section'tie6.4} -'xrdef {BEGIN/END-pg}{57} -'xrdef {BEGIN/END-snt}{section'tie6.5} -'xrdef {Boolean-pg}{58} -'xrdef {Boolean-snt}{section'tie6.6} -'xrdef {Conditional Patterns-pg}{59} -'xrdef {Conditional Patterns-snt}{section'tie6.7} -'xrdef {Actions-pg}{61} -'xrdef {Actions-snt}{chapter'tie7} -'xrdef {Expressions-pg}{63} -'xrdef {Expressions-snt}{chapter'tie8} -'xrdef {Constants-pg}{63} -'xrdef {Constants-snt}{section'tie8.1} -'xrdef {Variables-pg}{64} -'xrdef {Variables-snt}{section'tie8.2} -'xrdef {Arithmetic Ops-pg}{65} -'xrdef {Arithmetic Ops-snt}{section'tie8.3} -'xrdef {Concatenation-pg}{65} -'xrdef {Concatenation-snt}{section'tie8.4} -'xrdef {Comparison Ops-pg}{66} -'xrdef {Comparison Ops-snt}{section'tie8.5} -'xrdef {Boolean Ops-pg}{67} -'xrdef {Boolean Ops-snt}{section'tie8.6} -'xrdef {Assignment Ops-pg}{68} -'xrdef {Assignment Ops-snt}{section'tie8.7} -'xrdef {Increment Ops-pg}{70} -'xrdef {Increment Ops-snt}{section'tie8.8} -'xrdef {Conversion-pg}{71} -'xrdef {Conversion-snt}{section'tie8.9} -'xrdef {Conditional Exp-pg}{72} -'xrdef {Conditional Exp-snt}{section'tie8.10} -'xrdef {Function Calls-pg}{73} -'xrdef {Function Calls-snt}{section'tie8.11} -'xrdef {Statements-pg}{75} -'xrdef {Statements-snt}{chapter'tie9} -'xrdef {If-pg}{75} -'xrdef {If-snt}{section'tie9.1} -'xrdef {While-pg}{76} -'xrdef {While-snt}{section'tie9.2} -'xrdef {Do-pg}{77} -'xrdef {Do-snt}{section'tie9.3} -'xrdef {For-pg}{77} -'xrdef {For-snt}{section'tie9.4} -'xrdef {Break-pg}{79} -'xrdef {Break-snt}{section'tie9.5} -'xrdef {Continue-pg}{80} -'xrdef {Continue-snt}{section'tie9.6} -'xrdef {Next-pg}{81} -'xrdef {Next-snt}{section'tie9.7} -'xrdef {Exit-pg}{82} -'xrdef {Exit-snt}{section'tie9.8} -'xrdef {Arrays-pg}{83} -'xrdef {Arrays-snt}{chapter'tie10} -'xrdef {Array Intro-pg}{83} -'xrdef {Array Intro-snt}{section'tie10.1} -'xrdef {Reference to Elements-pg}{85} -'xrdef {Reference to Elements-snt}{section'tie10.2} -'xrdef {Assigning Elements-pg}{86} -'xrdef {Assigning Elements-snt}{section'tie10.3} -'xrdef {Array Example-pg}{86} -'xrdef {Array Example-snt}{section'tie10.4} -'xrdef {Scanning an Array-pg}{87} -'xrdef {Scanning an Array-snt}{section'tie10.5} -'xrdef {Delete-pg}{88} -'xrdef {Delete-snt}{section'tie10.6} -'xrdef {Multi-dimensional-pg}{89} -'xrdef {Multi-dimensional-snt}{section'tie10.7} -'xrdef {Multi-scanning-pg}{91} -'xrdef {Multi-scanning-snt}{section'tie10.8} -'xrdef {Built-in-pg}{93} -'xrdef {Built-in-snt}{chapter'tie11} -'xrdef {Numeric Functions-pg}{93} -'xrdef {Numeric Functions-snt}{section'tie11.1} -'xrdef {String Functions-pg}{95} -'xrdef {String Functions-snt}{section'tie11.2} -'xrdef {I/O Functions-pg}{98} -'xrdef {I/O Functions-snt}{section'tie11.3} -'xrdef {User-defined-pg}{99} -'xrdef {User-defined-snt}{chapter'tie12} -'xrdef {Definition Syntax-pg}{99} -'xrdef {Definition Syntax-snt}{section'tie12.1} -'xrdef {Function Example-pg}{100} -'xrdef {Function Example-snt}{section'tie12.2} -'xrdef {Function Caveats-pg}{101} -'xrdef {Function Caveats-snt}{section'tie12.3} -'xrdef {Return Statement-pg}{102} -'xrdef {Return Statement-snt}{section'tie12.4} -'xrdef {Special-pg}{105} -'xrdef {Special-snt}{chapter'tie13} -'xrdef {User-modified-pg}{105} -'xrdef {User-modified-snt}{section'tie13.1} -'xrdef {Auto-set-pg}{106} -'xrdef {Auto-set-snt}{section'tie13.2} -'xrdef {Sample Program-pg}{109} -'xrdef {Sample Program-snt}{} -'xrdef {Notes-pg}{111} -'xrdef {Notes-snt}{} -'xrdef {Extensions-pg}{111} -'xrdef {Extensions-snt}{} -'xrdef {Future Extensions-pg}{111} -'xrdef {Future Extensions-snt}{} -'xrdef {Improvements-pg}{112} -'xrdef {Improvements-snt}{} -'xrdef {Manual Improvements-pg}{113} -'xrdef {Manual Improvements-snt}{} -'xrdef {Glossary-pg}{115} -'xrdef {Glossary-snt}{} -'xrdef {Index-pg}{119} -'xrdef {Index-snt}{} diff --git a/gawk.cp b/gawk.cp deleted file mode 100644 index 7ff13135..00000000 --- a/gawk.cp +++ /dev/null @@ -1,234 +0,0 @@ -\entry {What is awk}{1}{What is {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Uses of awk}{1}{Uses of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Acronym}{1}{Acronym} -\entry {History of awk}{1}{History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Manual, using this}{9}{Manual, using this} -\entry {Using this manual}{9}{Using this manual} -\entry {Language, awk}{9}{Language, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Program, awk}{9}{Program, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {awk language}{9}{{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} language} -\entry {awk program}{9}{{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program} -\entry {Input file, sample}{9}{Input file, sample} -\entry {Sample input file}{9}{Sample input file} -\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file}{9}{`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file} -\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file}{10}{`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file} -\entry {Script, definition of}{11}{Script, definition of} -\entry {Rule, definition of}{11}{Rule, definition of} -\entry {Pattern, definition of}{11}{Pattern, definition of} -\entry {Action, definition of}{11}{Action, definition of} -\entry {Program, definition of}{11}{Program, definition of} -\entry {Basic function of gawk}{11}{Basic function of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}} -\entry {Action, curly braces}{11}{Action, curly braces} -\entry {Curly braces}{11}{Curly braces} -\entry {print $0}{11}{{\fam \ttfam \tentt \rawbackslash \frenchspacing print $0}\hbox {}} -\entry {Action, default}{11}{Action, default} -\entry {Pattern, default}{11}{Pattern, default} -\entry {Default action}{11}{Default action} -\entry {Default pattern}{11}{Default pattern} -\entry {How gawk works}{12}{How gawk works} -\entry {Command line formats}{14}{Command line formats} -\entry {Running gawk programs}{14}{Running gawk programs} -\entry {Single quotes, why they are needed}{15}{Single quotes, why they are needed} -\entry {Standard input}{15}{Standard input} -\entry {Input, standard}{15}{Input, standard} -\entry {Case sensitivity and gawk}{16}{Case sensitivity and gawk} -\entry {Pattern, case sensitive}{16}{Pattern, case sensitive} -\entry {running long programs}{16}{running long programs} -\entry {-f option}{16}{-f option} -\entry {program file}{16}{program file} -\entry {file, awk program}{16}{file, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program} -\entry {Executable Scripts}{17}{Executable Scripts} -\entry {Scripts, Executable}{17}{Scripts, Executable} -\entry {Self contained Programs}{17}{Self contained Programs} -\entry {Program, Self contained}{17}{Program, Self contained} -\entry {#!}{17}{#!} -\entry {Shell Scripts}{17}{Shell Scripts} -\entry {Scripts, Shell}{17}{Scripts, Shell} -\entry {Command Line}{18}{Command Line} -\entry {Invocation of gawk}{18}{Invocation of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}} -\entry {Arguments, Command Line}{18}{Arguments, Command Line} -\entry {Options, Command Line}{18}{Options, Command Line} -\entry {Multiple passes over data}{19}{Multiple passes over data} -\entry {Passes, Multiple}{19}{Passes, Multiple} -\entry {Comments}{19}{Comments} -\entry {Use of comments}{19}{Use of comments} -\entry {Documenting awk programs}{19}{Documenting {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} programs} -\entry {Programs, documenting}{19}{Programs, documenting} -\entry {Backslash Continuation}{20}{Backslash Continuation} -\entry {Continuing statements on the next line}{20}{Continuing statements on the next line} -\entry {Multiple statements on one line}{21}{Multiple statements on one line} -\entry {When to use awk}{21}{When to use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Applications of awk}{21}{Applications of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Emacs Lisp}{21}{Emacs Lisp} -\entry {Reading files, general}{23}{Reading files, general} -\entry {Input, general}{23}{Input, general} -\entry {Standard input}{23}{Standard input} -\entry {Input, standard}{23}{Input, standard} -\entry {General input}{23}{General input} -\entry {Record separator, RS}{23}{Record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}} -\entry {Changing the record separator}{23}{Changing the record separator} -\entry {Number of records, NR}{24}{Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}} -\entry {Number of records, FNR}{24}{Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}} -\entry {Examining fields}{24}{Examining fields} -\entry {Fields}{24}{Fields} -\entry {Accessing fields}{24}{Accessing fields} -\entry {$ (field operator)}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {} (field operator)} -\entry {Operators, $}{24}{Operators, {\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {}} -\entry {$NF, last field in record}{25}{{\fam \ttfam \tentt \rawbackslash \frenchspacing $NF}\hbox {}, last field in record} -\entry {Number of fields, NF}{25}{Number of fields, {\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}} -\entry {Fields, negative-numbered}{26}{Fields, negative-numbered} -\entry {Negative-numbered fields}{26}{Negative-numbered fields} -\entry {Field, changing contents of}{27}{Field, changing contents of} -\entry {Changing contents of a field}{27}{Changing contents of a field} -\entry {Fields, semantics of}{28}{Fields, semantics of} -\entry {Fields, separating}{28}{Fields, separating} -\entry {Field separator, FS}{28}{Field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}} -\entry {Separator character, choice of}{29}{Separator character, choice of} -\entry {Field separator, choice of}{29}{Field separator, choice of} -\entry {Regular expressions, field separators and}{29}{Regular expressions, field separators and} -\entry {Field separator, setting on command line}{29}{Field separator, setting on command line} -\entry {Command line, setting FS on}{29}{Command line, setting {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {} on} -\entry {Multiple line records}{31}{Multiple line records} -\entry {Input, multiple line records}{31}{Input, multiple line records} -\entry {Reading files, multiple line records}{31}{Reading files, multiple line records} -\entry {Records, multiple line}{31}{Records, multiple line} -\entry {Input, getline function}{32}{Input, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function} -\entry {Reading files, getline function}{32}{Reading files, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function} -\entry {close statement for input}{36}{{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for input} -\entry {Printing, general}{39}{Printing, general} -\entry {Output}{39}{Output} -\entry {print statement}{39}{{\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} statement} -\entry {Output field separator, OFS}{41}{Output field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}} -\entry {Output record separator, ORS}{41}{Output record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}} -\entry {Output redirection}{42}{Output redirection} -\entry {Redirection of output}{42}{Redirection of output} -\entry {{\fam \ttfam \tentt \gtr }}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }}\hbox {}} -\entry {{\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}\hbox {}} -\entry {{\fam \ttfam \tentt \char '174}}{42}{{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \char '174}}\hbox {}} -\entry {Pipes for output}{42}{Pipes for output} -\entry {Output, piping}{42}{Output, piping} -\entry {close statement for output}{43}{{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for output} -\entry {Closing files and pipes}{43}{Closing files and pipes} -\entry {Formatted output}{44}{Formatted output} -\entry {Output, formatted}{44}{Output, formatted} -\entry {printf statement, format of}{45}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} statement, format of} -\entry {Format string}{45}{Format string} -\entry {printf, format-control characters}{45}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, format-control characters} -\entry {Format specifier}{45}{Format specifier} -\entry {printf, modifiers}{46}{{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, modifiers} -\entry {Modifiers (in format specifiers)}{46}{Modifiers (in format specifiers)} -\entry {One-liners}{49}{One-liners} -\entry {Patterns, definition of}{51}{Patterns, definition of} -\entry {Patterns, types of}{51}{Patterns, types of} -\entry {Empty pattern}{51}{Empty pattern} -\entry {Pattern, empty}{51}{Pattern, empty} -\entry {Pattern, regular expressions}{52}{Pattern, regular expressions} -\entry {Regexp}{52}{Regexp} -\entry {Regular expressions as patterns}{52}{Regular expressions as patterns} -\entry {Regular expression matching operators}{52}{Regular expression matching operators} -\entry {String-matching operators}{52}{String-matching operators} -\entry {Operators, string-matching}{52}{Operators, string-matching} -\entry {Operators, regular expression matching}{52}{Operators, regular expression matching} -\entry {regexp search operators}{52}{regexp search operators} -\entry {Computed Regular Expressions}{52}{Computed Regular Expressions} -\entry {Regular Expressions, Computed}{52}{Regular Expressions, Computed} -\entry {Dynamic Regular Expressions}{52}{Dynamic Regular Expressions} -\entry {Regular Expressions, Dynamic}{52}{Regular Expressions, Dynamic} -\entry {Metacharacters}{53}{Metacharacters} -\entry {Regular expression, metacharacters}{53}{Regular expression, metacharacters} -\entry {Comparison expressions as patterns}{55}{Comparison expressions as patterns} -\entry {Pattern, comparison expressions}{55}{Pattern, comparison expressions} -\entry {Relational operators}{55}{Relational operators} -\entry {Operators, relational}{55}{Operators, relational} -\entry {Range pattern}{56}{Range pattern} -\entry {patterns, range}{56}{patterns, range} -\entry {BEGIN, special pattern}{57}{{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}, special pattern} -\entry {Patterns, BEGIN}{57}{Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}} -\entry {END, special pattern}{57}{{\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}, special pattern} -\entry {Patterns, END}{57}{Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}} -\entry {Patterns, boolean}{58}{Patterns, boolean} -\entry {Boolean patterns}{58}{Boolean patterns} -\entry {Conditional Patterns}{59}{Conditional Patterns} -\entry {Patterns, Conditional}{59}{Patterns, Conditional} -\entry {Ternary Operator}{59}{Ternary Operator} -\entry {Operator, Ternary}{59}{Operator, Ternary} -\entry {Action, general}{61}{Action, general} -\entry {Curly braces}{61}{Curly braces} -\entry {Action, curly braces}{61}{Action, curly braces} -\entry {Action, separating statements}{61}{Action, separating statements} -\entry {Expressions}{61}{Expressions} -\entry {Statements}{61}{Statements} -\entry {Compound statements}{61}{Compound statements} -\entry {Function definitions}{61}{Function definitions} -\entry {Constants, types of}{63}{Constants, types of} -\entry {String constants}{63}{String constants} -\entry {String value}{63}{String value} -\entry {Numerical constant}{63}{Numerical constant} -\entry {Numerical value}{63}{Numerical value} -\entry {Differences between gawk and awk}{63}{Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {Escape sequence notation}{63}{Escape sequence notation} -\entry {Variables, user-defined}{64}{Variables, user-defined} -\entry {User-defined variables}{64}{User-defined variables} -\entry {Built-in variables}{64}{Built-in variables} -\entry {Variables, built-in}{64}{Variables, built-in} -\entry {Arithmetic operators}{65}{Arithmetic operators} -\entry {Operators, arithmetic}{65}{Operators, arithmetic} -\entry {Mod function, semantics of}{65}{Mod function, semantics of} -\entry {Differences between gawk and awk}{65}{Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}} -\entry {String operators}{65}{String operators} -\entry {Operators, string}{65}{Operators, string} -\entry {Concatenation}{65}{Concatenation} -\entry {Comparison expressions}{66}{Comparison expressions} -\entry {Expressions, comparison}{66}{Expressions, comparison} -\entry {Relational operators}{66}{Relational operators} -\entry {Operators, relational}{66}{Operators, relational} -\entry {Expressions, boolean}{67}{Expressions, boolean} -\entry {Boolean expressions}{67}{Boolean expressions} -\entry {Operators, boolean}{67}{Operators, boolean} -\entry {Boolean operators}{67}{Boolean operators} -\entry {Assignment operators}{68}{Assignment operators} -\entry {Operators, assignment}{68}{Operators, assignment} -\entry {Lvalue}{68}{Lvalue} -\entry {Increment operators}{70}{Increment operators} -\entry {Operators, increment}{70}{Operators, increment} -\entry {Conversion of strings and numbers}{71}{Conversion of strings and numbers} -\entry {Conditional expression}{72}{Conditional expression} -\entry {Expression, conditional}{72}{Expression, conditional} -\entry {Function call}{73}{Function call} -\entry {Calling a function}{73}{Calling a function} -\entry {Arguments in function call}{73}{Arguments in function call} -\entry {Statements}{75}{Statements} -\entry {if statement}{75}{{\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} statement} -\entry {while statement}{76}{{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} statement} -\entry {Loop}{76}{Loop} -\entry {Body of a loop}{76}{Body of a loop} -\entry {for statement}{77}{{\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement} -\entry {break statement}{79}{{\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} statement} -\entry {Loops, breaking out of}{79}{Loops, breaking out of} -\entry {continue statement}{80}{{\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} statement} -\entry {next statement}{81}{{\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} statement} -\entry {exit statement}{82}{{\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} statement} -\entry {Arrays}{83}{Arrays} -\entry {Arrays, definition of}{83}{Arrays, definition of} -\entry {Associative arrays}{83}{Associative arrays} -\entry {Array reference}{85}{Array reference} -\entry {Element of array}{85}{Element of array} -\entry {Reference to array}{85}{Reference to array} -\entry {Arrays, determining presence of elements}{85}{Arrays, determining presence of elements} -\entry {Array assignment}{86}{Array assignment} -\entry {Element assignment}{86}{Element assignment} -\entry {for (x in \dots {})}{87}{{\fam \ttfam \tentt \rawbackslash \frenchspacing for (x in \dots {})}\hbox {}} -\entry {Arrays, special for statement}{87}{Arrays, special {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement} -\entry {Scanning an array}{87}{Scanning an array} -\entry {delete statement}{88}{{\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} statement} -\entry {Deleting elements of arrays}{88}{Deleting elements of arrays} -\entry {Removing elements of arrays}{88}{Removing elements of arrays} -\entry {Arrays, deleting an element}{88}{Arrays, deleting an element} -\entry {Subscripts, multi-dimensional in arrays}{89}{Subscripts, multi-dimensional in arrays} -\entry {Arrays, multi-dimensional subscripts}{89}{Arrays, multi-dimensional subscripts} -\entry {Built-in functions, list of}{93}{Built-in functions, list of} -\entry {Interaction of awk with other programs}{98}{Interaction of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} with other programs} -\entry {User-defined functions}{99}{User-defined functions} -\entry {Functions, user-defined}{99}{Functions, user-defined} -\entry {return statement}{102}{{\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement} -\entry {Special variables, user modifiable}{105}{Special variables, user modifiable} diff --git a/gawk.cps b/gawk.cps deleted file mode 100644 index 9c5c7c44..00000000 --- a/gawk.cps +++ /dev/null @@ -1,253 +0,0 @@ -\initial {#} -\entry {#!}{17} -\initial {$} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {} (field operator)}{25} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing $NF}\hbox {}, last field in record}{25} -\initial {-} -\entry {-f option}{16} -\initial {`} -\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing BBS-list}'\hbox {} file}{9} -\entry {`{\fam \ttfam \tentt \rawbackslash \frenchspacing inventory-shipped}'\hbox {} file}{10} -\initial {{\fam \ttfam \tentt \char '174}} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \char '174}}\hbox {}}{42} -\initial {{\fam \ttfam \tentt \gtr }} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }}\hbox {}}{42} -\initial {{\fam \ttfam \tentt \gtr }} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing {\fam \ttfam \tentt \gtr }{\fam \ttfam \tentt \gtr }}\hbox {}}{42} -\initial {A} -\entry {Accessing fields}{24} -\entry {Acronym}{1} -\entry {Action, curly braces}{11, 61} -\entry {Action, default}{12} -\entry {Action, definition of}{11} -\entry {Action, general}{61} -\entry {Action, separating statements}{61} -\entry {Applications of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{21} -\entry {Arguments in function call}{73} -\entry {Arguments, Command Line}{18} -\entry {Arithmetic operators}{65} -\entry {Array assignment}{86} -\entry {Array reference}{85} -\entry {Arrays}{83} -\entry {Arrays, definition of}{83} -\entry {Arrays, deleting an element}{88} -\entry {Arrays, determining presence of elements}{85} -\entry {Arrays, multi-dimensional subscripts}{89} -\entry {Arrays, special {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}{87} -\entry {Assignment operators}{68} -\entry {Associative arrays}{83} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} language}{9} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}{9} -\initial {B} -\entry {Backslash Continuation}{20} -\entry {Basic function of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{11} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}, special pattern}{57} -\entry {Body of a loop}{76} -\entry {Boolean expressions}{67} -\entry {Boolean operators}{67} -\entry {Boolean patterns}{58} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} statement}{79} -\entry {Built-in functions, list of}{93} -\entry {Built-in variables}{64} -\initial {C} -\entry {Calling a function}{73} -\entry {Case sensitivity and gawk}{16} -\entry {Changing contents of a field}{27} -\entry {Changing the record separator}{23} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for input}{36} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing close}\hbox {} statement for output}{43} -\entry {Closing files and pipes}{43} -\entry {Command Line}{18} -\entry {Command line formats}{14} -\entry {Command line, setting {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {} on}{29} -\entry {Comments}{19} -\entry {Comparison expressions}{66} -\entry {Comparison expressions as patterns}{55} -\entry {Compound statements}{61} -\entry {Computed Regular Expressions}{52} -\entry {Concatenation}{65} -\entry {Conditional expression}{72} -\entry {Conditional Patterns}{59} -\entry {Constants, types of}{63} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} statement}{80} -\entry {Continuing statements on the next line}{20} -\entry {Conversion of strings and numbers}{71} -\entry {Curly braces}{11, 61} -\initial {D} -\entry {Default action}{12} -\entry {Default pattern}{12} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} statement}{88} -\entry {Deleting elements of arrays}{88} -\entry {Differences between {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{63, 65} -\entry {Documenting {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} programs}{19} -\entry {Dynamic Regular Expressions}{52} -\initial {E} -\entry {Element assignment}{86} -\entry {Element of array}{85} -\entry {Emacs Lisp}{21} -\entry {Empty pattern}{51} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}, special pattern}{57} -\entry {Escape sequence notation}{63} -\entry {Examining fields}{24} -\entry {Executable Scripts}{17} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} statement}{82} -\entry {Expression, conditional}{72} -\entry {Expressions}{61} -\entry {Expressions, boolean}{67} -\entry {Expressions, comparison}{66} -\initial {F} -\entry {Field separator, choice of}{29} -\entry {Field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}{28} -\entry {Field separator, setting on command line}{29} -\entry {Field, changing contents of}{27} -\entry {Fields}{24} -\entry {Fields, negative-numbered}{26} -\entry {Fields, semantics of}{28} -\entry {Fields, separating}{28} -\entry {file, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} program}{16} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing for (x in \dots {})}\hbox {}}{87} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} statement}{77} -\entry {Format specifier}{45} -\entry {Format string}{45} -\entry {Formatted output}{44} -\entry {Function call}{73} -\entry {Function definitions}{61} -\entry {Functions, user-defined}{99} -\initial {G} -\entry {General input}{23} -\initial {H} -\entry {History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1} -\entry {How gawk works}{12} -\initial {I} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} statement}{75} -\entry {Increment operators}{70} -\entry {Input file, sample}{9} -\entry {Input, general}{23} -\entry {Input, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}{32} -\entry {Input, multiple line records}{31} -\entry {Input, standard}{15, 23} -\entry {Interaction of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} with other programs}{98} -\entry {Invocation of {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{18} -\initial {L} -\entry {Language, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{9} -\entry {Loop}{76} -\entry {Loops, breaking out of}{79} -\entry {Lvalue}{68} -\initial {M} -\entry {Manual, using this}{9} -\entry {Metacharacters}{53} -\entry {Mod function, semantics of}{65} -\entry {Modifiers (in format specifiers)}{46} -\entry {Multiple line records}{31} -\entry {Multiple passes over data}{19} -\entry {Multiple statements on one line}{21} -\initial {N} -\entry {Negative-numbered fields}{26} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} statement}{81} -\entry {Number of fields, {\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}{25} -\entry {Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}{24} -\entry {Number of records, {\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}{24} -\entry {Numerical constant}{63} -\entry {Numerical value}{63} -\initial {O} -\entry {One-liners}{49} -\entry {Operator, Ternary}{59} -\entry {Operators, {\fam \ttfam \tentt \rawbackslash \frenchspacing $}\hbox {}}{25} -\entry {Operators, arithmetic}{65} -\entry {Operators, assignment}{68} -\entry {Operators, boolean}{67} -\entry {Operators, increment}{70} -\entry {Operators, regular expression matching}{52} -\entry {Operators, relational}{55, 66} -\entry {Operators, string}{65} -\entry {Operators, string-matching}{52} -\entry {Options, Command Line}{18} -\entry {Output}{39} -\entry {Output field separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}{41} -\entry {Output record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}{41} -\entry {Output redirection}{42} -\entry {Output, formatted}{44} -\entry {Output, piping}{42} -\initial {P} -\entry {Passes, Multiple}{19} -\entry {Pattern, case sensitive}{16} -\entry {Pattern, comparison expressions}{55} -\entry {Pattern, default}{12} -\entry {Pattern, definition of}{11} -\entry {Pattern, empty}{51} -\entry {Pattern, regular expressions}{52} -\entry {Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {}}{57} -\entry {Patterns, boolean}{58} -\entry {Patterns, Conditional}{59} -\entry {Patterns, definition of}{51} -\entry {Patterns, {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {}}{57} -\entry {patterns, range}{56} -\entry {Patterns, types of}{51} -\entry {Pipes for output}{42} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing print $0}\hbox {}}{11} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} statement}{39} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} statement, format of}{45} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, format-control characters}{45} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}, modifiers}{46} -\entry {Printing, general}{39} -\entry {program file}{16} -\entry {Program, {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{9} -\entry {Program, definition of}{11} -\entry {Program, Self contained}{17} -\entry {Programs, documenting}{19} -\initial {R} -\entry {Range pattern}{56} -\entry {Reading files, general}{23} -\entry {Reading files, {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {} function}{32} -\entry {Reading files, multiple line records}{31} -\entry {Record separator, {\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}{23} -\entry {Records, multiple line}{31} -\entry {Redirection of output}{42} -\entry {Reference to array}{85} -\entry {Regexp}{52} -\entry {regexp search operators}{52} -\entry {Regular expression matching operators}{52} -\entry {Regular expression, metacharacters}{53} -\entry {Regular expressions as patterns}{52} -\entry {Regular Expressions, Computed}{52} -\entry {Regular Expressions, Dynamic}{52} -\entry {Regular expressions, field separators and}{29} -\entry {Relational operators}{55, 66} -\entry {Removing elements of arrays}{88} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement}{102} -\entry {Rule, definition of}{11} -\entry {Running gawk programs}{14} -\entry {running long programs}{16} -\initial {S} -\entry {Sample input file}{9} -\entry {Scanning an array}{87} -\entry {Script, definition of}{11} -\entry {Scripts, Executable}{17} -\entry {Scripts, Shell}{17} -\entry {Self contained Programs}{17} -\entry {Separator character, choice of}{29} -\entry {Shell Scripts}{17} -\entry {Single quotes, why they are needed}{15} -\entry {Special variables, user modifiable}{105} -\entry {Standard input}{15, 23} -\entry {Statements}{61, 75} -\entry {String constants}{63} -\entry {String operators}{65} -\entry {String value}{63} -\entry {String-matching operators}{52} -\entry {Subscripts, multi-dimensional in arrays}{89} -\initial {T} -\entry {Ternary Operator}{59} -\initial {U} -\entry {Use of comments}{19} -\entry {User-defined functions}{99} -\entry {User-defined variables}{64} -\entry {Uses of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1} -\entry {Using this manual}{9} -\initial {V} -\entry {Variables, built-in}{64} -\entry {Variables, user-defined}{64} -\initial {W} -\entry {What is {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{1} -\entry {When to use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{21} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} statement}{76} diff --git a/gawk.dvi b/gawk.dvi Binary files differdeleted file mode 100644 index 8c863aa4..00000000 --- a/gawk.dvi +++ /dev/null diff --git a/gawk.fn b/gawk.fn deleted file mode 100644 index d45d54e7..00000000 --- a/gawk.fn +++ /dev/null @@ -1,10 +0,0 @@ -\entry {getline}{32}{{\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}} -\entry {match}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}} -\entry {length}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing length}\hbox {}} -\entry {match}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}} -\entry {split}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing split}\hbox {}} -\entry {sprintf}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing sprintf}\hbox {}} -\entry {sub}{96}{{\fam \ttfam \tentt \rawbackslash \frenchspacing sub}\hbox {}} -\entry {gsub}{97}{{\fam \ttfam \tentt \rawbackslash \frenchspacing gsub}\hbox {}} -\entry {substr}{97}{{\fam \ttfam \tentt \rawbackslash \frenchspacing substr}\hbox {}} -\entry {system}{98}{{\fam \ttfam \tentt \rawbackslash \frenchspacing system}\hbox {}} diff --git a/gawk.fns b/gawk.fns deleted file mode 100644 index bfd931c1..00000000 --- a/gawk.fns +++ /dev/null @@ -1,13 +0,0 @@ -\initial {G} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}}{32} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing gsub}\hbox {}}{97} -\initial {L} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing length}\hbox {}}{95} -\initial {M} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing match}\hbox {}}{95} -\initial {S} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing split}\hbox {}}{96} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing sprintf}\hbox {}}{96} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing sub}\hbox {}}{96} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing substr}\hbox {}}{97} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing system}\hbox {}}{98} diff --git a/gawk.ky b/gawk.ky deleted file mode 100644 index e69de29b..00000000 --- a/gawk.ky +++ /dev/null diff --git a/gawk.kys b/gawk.kys deleted file mode 100644 index e69de29b..00000000 --- a/gawk.kys +++ /dev/null diff --git a/gawk.pg b/gawk.pg deleted file mode 100644 index e69de29b..00000000 --- a/gawk.pg +++ /dev/null diff --git a/gawk.pgs b/gawk.pgs deleted file mode 100644 index e69de29b..00000000 --- a/gawk.pgs +++ /dev/null diff --git a/gawk.texinfo b/gawk.texinfo deleted file mode 100644 index 4c22e8ad..00000000 --- a/gawk.texinfo +++ /dev/null @@ -1,6587 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header (This is for running Texinfo on a region.) -@setfilename gawk-info -@settitle The GAWK Manual -@c %**end of header (This is for running Texinfo on a region.) - -@iftex -@finalout -@end iftex - -@ifinfo -This file documents @code{awk}, a program that you can use to select -particular records in a file and perform operations upon them. - -Copyright (C) 1989 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end ifinfo - -@setchapternewpage odd -@titlepage -@sp 11 -@center @titlefont{The GAWK Manual} -@sp 4 -@center by Diane Barlow Close and Richard Stallman -@center with Paul H. Rubin -@center and Arnold D. Robbins -@sp 2 -@center Edition 0.1 Beta -@sp 2 -@center March 1989 - -@c Include the Distribution inside the titlepage environment so -@c that headings are turned off. Headings on and off do not work. - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1989 Free Software Foundation, Inc. -@sp 2 - -This is Edition 0.1 Beta of @cite{The GAWK Manual}, @* -for the 2.02 Beta, 23 December 1988, version @* -of the GNU implementation of AWK. - -@sp 2 -Published by the Free Software Foundation @* -675 Massachusetts Avenue, @* -Cambridge, MA 02139 USA @* -Printed copies are available for $10 each. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end titlepage - -@node Top, Preface, (dir), (dir) -@comment node-name, next, previous, up -@c Preface or Licensing nodes should come right after the Top -@c node, in `unnumbered' sections, then the chapter, `What is gawk'. - -@ifinfo -This file documents @code{awk}, a program that you can use to select -particular records in a file and perform operations upon them; it -contains the following chapters: -@end ifinfo - -@menu -* Preface:: What you can do with @code{awk}; brief history - and acknowledgements. - -* License:: Your right to copy and distribute @code{gawk}. - -* This Manual:: Using this manual. -@ifinfo - Includes sample input files that you can use. -@end ifinfo -* Getting Started:: A basic introduction to using @code{awk}. - How to run an @code{awk} program. Command line syntax. - -* Reading Files:: How to read files and manipulate fields. - -* Printing:: How to print using @code{awk}. Describes the - @code{print} and @code{printf} statements. - Also describes redirection of output. - -* One-liners:: Short, sample @code{awk} programs. - -* Patterns:: The various types of patterns explained in detail. - -* Actions:: The various types of actions are introduced here. - Describes expressions and the various operators in - detail. Also describes comparison expressions. - -* Statements:: The various control statements are described in - detail. - -* Arrays:: The description and use of arrays. Also includes - array--oriented control statements. - -* User-defined:: User--defined functions are described in detail. - -* Built-in:: The built--in functions are summarized here. - -* Special:: The special variables are summarized here. - -* Sample Program:: A sample @code{awk} program with a complete explanation. - -* Notes:: Something about the implementation of @code{gawk}. - -* Glossary:: An explanation of some unfamiliar terms. - -* Index:: -@end menu - - -@node Preface, License, Top , Top -@comment node-name, next, previous, up -@unnumbered Preface - -@cindex What is @code{awk} -If you are like many computer users, you frequently would like to make -changes in various text files wherever certain patterns appear, or -extract data from parts of certain lines while discarding the rest. To -write a program to do this in a language such as C or Pascal is a -time--consuming inconvenience that may take many lines of code. The job -may be easier with @code{awk}. - -The @code{awk} utility interprets a special--purpose programming language -that makes it possible to handle simple data--reformatting jobs easily -with just a few lines of code. - -The GNU implementation of @code{awk} is called @code{gawk}; it is fully -upward compatible with the System V Release 3.1 and later -version of @code{awk}. All properly written -@code{awk} programs should work with @code{gawk}. So we usually don't -distinguish between @code{gawk} and other @code{awk} implementations in -this manual.@refill - -@cindex Uses of @code{awk} -This manual teaches you what @code{awk} does and how you can use -@code{awk} effectively. You should already be familiar with basic, -general--purpose, operating system commands such as @code{ls}. Using -@code{awk} you can: @refill - -@itemize @bullet -@item -manage small, personal databases, - -@item -generate reports, - -@item -validate data, -@item -produce indexes, and perform other document preparation tasks, - -@item -even experiment with algorithms that can be adapted later to other computer -languages! -@end itemize - -@menu -* History:: The history of gawk and awk. Acknowledgements. -@end menu - -@node History, , , Preface -@comment node-name, next, previous, up -@unnumberedsec History of @code{awk} and @code{gawk} - -@cindex Acronym -@cindex History of @code{awk} -The name @code{awk} comes from the initials of its designers: Alfred V. -Aho, Peter J. Weinberger, and Brian W. Kernighan. The original version of -@code{awk} was written in 1977. In 1985 a new version made the programming -language more powerful, introducing user--defined functions, multiple input -streams, and computed regular expressions. -@comment We don't refer people to non-free information -@comment In 1988, the original authors -@comment published @cite{The AWK Programming Language} (Addison-Wesley, ISBN -@comment 0-201-07981-X), as a definitive description of the @code{awk} language. - -The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin -and Jay Fenlason, with advice from Richard Stallman. John Woods -contributed parts of the code as well. In 1988, David Trueman, with -help from Arnold Robbins, reworked @code{gawk} for compatibility with -the newer @code{awk}. - -Many people need to be thanked for their assistance in producing this -manual. Jay Fenlason contributed many ideas and sample programs. Richard -Mlynarik and Robert Chassell gave helpful comments on drafts of this -manual. The paper @cite{A Supplemental Document for @code{awk}} by John W. -Pierce of the Chemistry Department at UC San Diego, pinpointed several -issues relevant both to @code{awk} implementation and to this manual, that -would otherwise have escaped us. - -Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable -assistance during the testing and debugging of @code{gawk}, and for -help in clarifying several points about the language.@refill - -@node License, This Manual, Preface, Top -@unnumbered GNU GENERAL PUBLIC LICENSE -@center Version 1, February 1989 - -@display -Copyright @copyright{} 1989 Free Software Foundation, Inc. -675 Mass Ave, Cambridge, MA 02139, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@unnumberedsec Preamble - - The license agreements of most software companies try to keep users -at the mercy of those companies. By contrast, our General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. The -General Public License applies to the Free Software Foundation's -software and to any other program whose authors commit to using it. -You can use it for your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Specifically, the General Public License is designed to make -sure that you have the freedom to give away or sell copies of free -software, that you receive source code or can get it if you want it, -that you can change the software or use pieces of it in new free -programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of a such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must tell them their rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - The precise terms and conditions for copying, distribution and -modification follow. - -@iftex -@unnumberedsec TERMS AND CONDITIONS -@end iftex -@ifinfo -@center TERMS AND CONDITIONS -@end ifinfo - -@enumerate -@item -This License Agreement applies to any program or other work which -contains a notice placed by the copyright holder saying it may be -distributed under the terms of this General Public License. The -``Program'', below, refers to any such program or work, and a ``work based -on the Program'' means either the Program or any work containing the -Program or a portion of it, either verbatim or with modifications. Each -licensee is addressed as ``you''. - -@item -You may copy and distribute verbatim copies of the Program's source -code as you receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice and -disclaimer of warranty; keep intact all the notices that refer to this -General Public License and to the absence of any warranty; and give any -other recipients of the Program a copy of this General Public License -along with the Program. You may charge a fee for the physical act of -transferring a copy. - -@item -You may modify your copy or copies of the Program or any portion of -it, and copy and distribute such modifications under the terms of Paragraph -1 above, provided that you also do the following: - -@itemize @bullet -@item -cause the modified files to carry prominent notices stating that -you changed the files and the date of any change; and - -@item -cause the whole of any work that you distribute or publish, that -in whole or in part contains the Program or any part thereof, either -with or without modifications, to be licensed at no charge to all -third parties under the terms of this General Public License (except -that you may choose to grant warranty protection to some or all -third parties, at your option). - -@item -If the modified program normally reads commands interactively when -run, you must cause it, when started running for such interactive use -in the simplest and most usual way, to print or display an -announcement including an appropriate copyright notice and a notice -that there is no warranty (or else, saying that you provide a -warranty) and that users may redistribute the program under these -conditions, and telling the user how to view a copy of this General -Public License. - -@item -You may charge a fee for the physical act of transferring a -copy, and you may at your option offer warranty protection in -exchange for a fee. -@end itemize - -Mere aggregation of another independent work with the Program (or its -derivative) on a volume of a storage or distribution medium does not bring -the other work under the scope of these terms. - -@item -You may copy and distribute the Program (or a portion or derivative of -it, under Paragraph 2) in object code or executable form under the terms of -Paragraphs 1 and 2 above provided that you also do one of the following: - -@itemize @bullet -@item -accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of -Paragraphs 1 and 2 above; or, - -@item -accompany it with a written offer, valid for at least three -years, to give any third party free (except for a nominal charge -for the cost of distribution) a complete machine-readable copy of the -corresponding source code, to be distributed under the terms of -Paragraphs 1 and 2 above; or, - -@item -accompany it with the information you received as to where the -corresponding source code may be obtained. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form alone.) -@end itemize - -Source code for a work means the preferred form of the work for making -modifications to it. For an executable file, complete source code means -all the source code for all modules it contains; but, as a special -exception, it need not include source code for modules which are standard -libraries that accompany the operating system on which the executable -file runs, or for standard header files or definitions files that -accompany that operating system. - -@item -You may not copy, modify, sublicense, distribute or transfer the -Program except as expressly provided under this General Public License. -Any attempt otherwise to copy, modify, sublicense, distribute or transfer -the Program is void, and will automatically terminate your rights to use -the Program under this License. However, parties who have received -copies, or rights to use copies, from you under this General Public -License will not have their licenses terminated so long as such parties -remain in full compliance. - -@item -By copying, distributing or modifying the Program (or any work based -on the Program) you indicate your acceptance of this license to do so, -and all its terms and conditions. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the original -licensor to copy, distribute or modify the Program subject to these -terms and conditions. You may not impose any further restrictions on the -recipients' exercise of the rights granted herein. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of the license which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -the license, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@iftex -@heading NO WARRANTY -@end iftex -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL -ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES -ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT -LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES -SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE -WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN -ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@iftex -@heading END OF TERMS AND CONDITIONS -@end iftex -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@unnumberedsec Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to humanity, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest to -attach them to the start of each source file to most effectively convey -the exclusion of warranty; and each file should have at least the -``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and a brief idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 1, or (at your option) -any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -@end smallexample - -The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and `show -c'; they could even be mouse-clicks or menu items---whatever suits your -program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here a sample; alter the names: - -@example -Yoyodyne, Inc., hereby disclaims all copyright interest in the -program `Gnomovision' (a program to direct compilers to make passes -at assemblers) written by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end example - -That's all there is to it! - -@node This Manual, Getting Started, License , Top -@chapter Using This Manual -@cindex Manual, using this -@cindex Using this manual -@cindex Language, @code{awk} -@cindex Program, @code{awk} -@cindex @code{awk} language -@cindex @code{awk} program - -The term @code{gawk} refers to a program (a version of @code{awk}) -developed by the Free Software Foundation, and to the language you -use to tell it what to do. When we need to be careful, we call the program -``the @code{awk} utility'' and the language ``the @code{awk} language''. -The purpose of this manual is to explain the @code{awk} language and how to -run the @code{awk} utility. - -The term @dfn{@code{awk} program} refers to a program written by you in -the @code{awk} programming language.@refill - -@xref{Getting Started}, for the bare essentials you need to know to -start using @code{awk}. - -Useful ``one--liners'' are included to give you a feel for the -@code{awk} language (@pxref{One-liners}). - -@ignore -@strong{I deleted four paragraphs here because they would confuse the -beginner more than help him. They mention terms such as ``field'', -``pattern'', ``action'', ``built--in function'' which the beginner -doesn't know.} - -@strong{If you can find a way to introduce several of these concepts here, -enough to give the reader a map of what is to follow, that might -be useful. I'm not sure that can be done without taking up more -space than ought to be used here. There may be no way to win.} - -@strong{ADR: I'd like to tackle this in phase 2 of my editing.} -@end ignore - -A sizable sample @code{awk} program has been provided for you (@pxref{Sample -Program}).@refill - -If you find terms that you aren't familiar with, try looking them -up in the glossary (@pxref{Glossary}).@refill - -Most of the time complete @code{awk} programs are used as examples, but in -some of the more advanced sections, only the part of the @code{awk} program -that illustrates the concept being described is shown.@refill - -@menu -This chapter contains the following sections: - -* The Files:: Sample data files for use in the @code{awk} programs - illustrated in this manual. -@end menu - -@node The Files, , , This Manual -@section Input Files for the Examples - -@cindex Input file, sample -@cindex Sample input file -@cindex @file{BBS-list} file -This manual contains many sample programs. The data for many of those -programs comes from two files. The first file, called @file{BBS-list}, -represents a list of computer bulletin board systems and information about -those systems. - -Each line of this file is one @dfn{record}. Each record contains the name -of a computer bulletin board, its phone number, the board's baud rate, and a -code for the number of hours it is operational. An @samp{A} in the last -column means the board operates 24 hours all week. A @samp{B} in the last -column means the board operates evening and weekend hours, only. A @samp{C} -means the board operates only on weekends. - -@group -@example -aardvark 555-5553 1200/300 B -alpo-net 555-3412 2400/1200/300 A -barfly 555-7685 1200/300 A -bites 555-1675 2400/1200/300 A -camelot 555-0542 300 C -core 555-2912 1200/300 C -fooey 555-1234 2400/1200/300 B -foot 555-6699 1200/300 B -macfoo 555-6480 1200/300 A -sdace 555-3430 2400/1200/300 A -sabafoo 555-2127 1200/300 C -@end example -@end group -The second data file, called @file{inventory-shipped}, represents -information about shipments during the year. Each line of this file is also -one record. Each record contains the month of the year, the number of green -crates shipped, the number of red boxes shipped, the number of orange bags -shipped, and the number of blue packages shipped, respectively. -@cindex @file{inventory-shipped} file - -@group -@example -Jan 13 25 15 115 -Feb 15 32 24 226 -Mar 15 24 34 228 -Apr 31 52 63 420 -May 16 34 29 208 -Jun 31 42 75 492 -Jul 24 34 67 436 -Aug 15 34 47 316 -Sep 13 55 37 277 -Oct 29 54 68 525 -Nov 20 87 82 577 -Dec 17 35 61 401 - -Jan 21 36 64 620 -Feb 26 58 80 652 -Mar 24 75 70 495 -Apr 21 70 74 514 -@end example -@end group - -@ifinfo -If you are reading this in GNU Emacs using Info, you can copy the regions -of text showing these sample files into your own test files. This way you -can try out the examples shown in the remainder of this document. You do -this by using the command @kbd{M-x write-region} to copy text from the Info -file into a file for use with @code{awk} (see your @cite{GNU Emacs Manual} -for more information). Using this information, create your own -@file{BBS-list} and @file{inventory-shipped} files, and practice what you -learn in this manual. -@end ifinfo - -@node Getting Started, Reading Files, This Manual, Top -@chapter Getting Started With @code{awk} - -@cindex Script, definition of -@cindex Rule, definition of -@cindex Pattern, definition of -@cindex Action, definition of -@cindex Program, definition of -@cindex Basic function of @code{gawk} -The basic function of @code{awk} is to search files for lines (or other -units of text) that contain certain patterns. When a line matching any -of those patterns is found, @code{awk} performs specified actions on -that line. Then @code{awk} keeps processing input lines until the end -of the file is reached.@refill - -An @code{awk} @dfn{program} or @dfn{script} consists of a series of -@dfn{rules}. (They may also contain @dfn{function definitions}, but -that is an advanced feature, so let's ignore it for now. -@xref{User-defined}.) - -A rule contains a @dfn{pattern}, an @dfn{action}, or both. Actions are -enclosed in curly braces to distinguish them from patterns. Therefore, -an @code{awk} program is a sequence of rules in the form:@refill -@cindex Action, curly braces -@cindex Curly braces - -@example -@var{pattern} @{ @var{action} @} -@var{pattern} @{ @var{action} @} -@dots{} -@end example - -@menu -* Very Simple:: A very simple example. -* Two Rules:: A less simple one--line example with two rules. -* More Complex:: A more complex example. -* Running gawk:: How to run gawk programs; includes command line syntax. -* Comments:: Adding documentation to gawk programs. -* Statements/Lines:: Subdividing or combining statements into lines. - -* When:: When to use gawk and when to use other things. -@end menu - -@node Very Simple, Two Rules, , Getting Started -@section A Very Simple Example - -@cindex @code{print $0} -The following command runs a simple @code{awk} program that searches the -input file @file{BBS-list} for the string of characters: @samp{foo}. (A -string of characters is usually called, quite simply, a @dfn{string}.) - -@example -awk '/foo/ @{ print $0 @}' BBS-list -@end example - -@noindent -When lines containing @samp{foo} are found, they are printed, because -@w{@code{print $0}} means print the current line. (Just @code{print} by -itself also means the same thing, so we could have written that -instead.) - -You will notice that slashes, @samp{/}, surround the string @samp{foo} -in the actual @code{awk} program. The slashes indicate that @samp{foo} -is a pattern to search for. This type of pattern is called a -@dfn{regular expression}, and is covered in more detail later -(@pxref{Regexp}). There are single quotes around the @code{awk} program -so that the shell won't interpret any of it as special shell -characters.@refill - -Here is what this program prints: - -@example -fooey 555-1234 2400/1200/300 B -foot 555-6699 1200/300 B -macfoo 555-6480 1200/300 A -sabafoo 555-2127 1200/300 C -@end example - -@cindex Action, default -@cindex Pattern, default -@cindex Default action -@cindex Default pattern -In an @code{awk} rule, either the pattern or the action can be omitted, -but not both. - -If the pattern is omitted, then the action is performed for @emph{every} -input line.@refill - -If the action is omitted, the default action is to print all lines that -match the pattern. We could leave out the action (the print statement -and the curly braces) in the above example, and the result would be the -same: all lines matching the pattern @samp{foo} would be printed. (By -comparison, omitting the print statement but retaining the curly braces -makes an empty action that does nothing; then no lines would be -printed.) - -@node Two Rules, More Complex, Very Simple, Getting Started -@section An Example with Two Rules -@cindex How gawk works - -The @code{awk} utility reads the input files one line at a -time. For each line, @code{awk} tries the patterns of all the rules. -If several patterns match then several actions are run, in the order in -which they appear in the @code{awk} program. If no patterns match, then -no actions are run. - -After processing all the rules (perhaps none) that match the line, -@code{awk} reads the next line (however, @pxref{Next}). -This continues until the end of the file is reached.@refill - -For example, the @code{awk} program: - -@example -/12/ @{ print $0 @} -/21/ @{ print $0 @} -@end example - -@noindent -contains two rules. The first rule has the string @samp{12} as the -pattern and @samp{print $0} as the action. The second rule has the -string @samp{21} as the pattern and also has @samp{print $0} as the -action. Each rule's action is enclosed in its own pair of braces. - -This @code{awk} program prints every line that contains the string -@samp{12} @emph{or} the string @samp{21}. If a line contains both -strings, it is printed twice, once by each rule. - -If we run this program on our two sample data files, @file{BBS-list} and -@file{inventory-shipped}, as shown here: - -@example -awk '/12/ @{ print $0 @} - /21/ @{ print $0 @}' BBS-list inventory-shipped -@end example - -@noindent -we get the following output: - -@example -aardvark 555-5553 1200/300 B -alpo-net 555-3412 2400/1200/300 A -barfly 555-7685 1200/300 A -bites 555-1675 2400/1200/300 A -core 555-2912 1200/300 C -fooey 555-1234 2400/1200/300 B -foot 555-6699 1200/300 B -macfoo 555-6480 1200/300 A -sdace 555-3430 2400/1200/300 A -sabafoo 555-2127 1200/300 C -sabafoo 555-2127 1200/300 C -Jan 21 36 64 620 -Apr 21 70 74 514 -@end example - -@noindent -Note how the line in @file{BBS-list} beginning with @samp{sabafoo} -was printed twice, once for each rule. - -@node More Complex, Running gawk, Two Rules, Getting Started -@comment node-name, next, previous, up -@section A More Complex Example - -Here is an example to give you an idea of what typical @code{awk} -programs do. This example shows how @code{awk} can be used to -summarize, select, and rearrange the output of another utility. It uses -features that haven't been covered yet, so don't worry if you don't -understand all the details. - -@example -ls -l | awk '$5 == "Nov" @{ sum += $4 @} - END @{ print sum @}' -@end example - -This command prints the total number of bytes in all the files in the -current directory that were last modified in November (of any year). -(In the C shell you would need to type a semicolon and then a backslash -at the end of the first line; in the Bourne shell you can type the example -as shown.) - -The @w{@code{ls -l}} part of this example is a command that gives you a full -listing of all the files in a directory, including file size and date. -Its output looks like this: - -@example --rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile --rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h --rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h --rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y --rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c --rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c --rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c --rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c -@end example - -@noindent -The first field contains read--write permissions, the second field contains -the number of links to the file, and the third field identifies the owner of -the file. The fourth field contains the size of the file in bytes. The -fifth, sixth, and seventh fields contain the month, day, and time, -respectively, that the file was last modified. Finally, the eighth field -contains the name of the file. - -The @samp{$5 == "Nov"} in our @code{awk} program is an expression that -tests whether the fifth field of the output from @w{@code{ls -l}} -matches the string @samp{Nov}. Each time a line has the string -@samp{Nov} in its fifth field, the action @samp{@{ sum += $4 @}} is -performed. This adds the fourth field (the file size) to the variable -@code{sum}. As a result, when @code{awk} has finished reading all the -input lines, @code{sum} will be the sum of the sizes of files whose -lines matched the pattern.@refill - -After the last line of output from @code{ls} has been processed, the -@code{END} pattern is executed, and the value of @code{sum} is -printed. In this example, the value of @code{sum} would be 80600.@refill - -These more advanced @code{awk} techniques are covered in later sections -(@pxref{Actions}). Before you can move on to more advanced @code{awk} -programming, you have to know how @code{awk} interprets your input and -displays your output. By manipulating @dfn{fields} and using special -@dfn{print} statements, you can produce some very useful and spectacular -looking reports.@refill - - -@node Running gawk, Comments, More Complex, Getting Started -@section How to Run @code{awk} Programs - -@cindex Command line formats -@cindex Running gawk programs -There are several ways to run an @code{awk} program. If the program is -short, it is easiest to include it in the command that runs @code{awk}, -like this: - -@example -awk '@var{program}' @var{input-file1} @var{input-file2} @dots{} -@end example - -@noindent -where @var{program} consists of a series of @var{patterns} and -@var{actions}, as described earlier. - -When the program is long, you would probably prefer to put it in a file -and run it with a command like this: - -@example -awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{} -@end example - -@menu -* One-shot:: Running a short throw--away @code{awk} program. -* Read Terminal:: Using no input files (input from terminal instead). -* Long:: Putting permanent @code{awk} programs in files. -* Executable Scripts:: Making self--contained @code{awk} programs. -* Command Line:: How the @code{awk} command line is laid out. -@end menu - -@node One-shot, Read Terminal, , Running gawk -@subsection One--shot Throw--away @code{awk} Programs - -Once you are familiar with @code{awk}, you will often type simple -programs at the moment you want to use them. Then you can write the -program as the first argument of the @code{awk} command, like this: - -@example -awk '@var{program}' @var{input-file1} @var{input-file2} @dots{} -@end example - -@noindent -where @var{program} consists of a series of @var{patterns} and -@var{actions}, as described earlier. - -@cindex Single quotes, why they are needed -This command format tells the shell to start @code{awk} and use the -@var{program} to process records in the input file(s). There are single -quotes around the @var{program} so that the shell doesn't interpret any -@code{awk} characters as special shell characters. They cause the -shell to treat all of @var{program} as a single argument for -@code{awk}. They also allow @var{program} to be more than one line -long.@refill - -This format is also useful for running short or medium--sized @code{awk} -programs from shell scripts, because it avoids the need for a separate -file for the @code{awk} program. A self--contained shell script is more -reliable since there are no other files to misplace. - -@node Read Terminal, Long, One-shot, Running gawk -@subsection Running @code{awk} without Input Files - -@cindex Standard input -@cindex Input, standard -You can also use @code{awk} without any input files. If you type the -command line:@refill - -@example -awk '@var{program}' -@end example - -@noindent -then @code{awk} applies the @var{program} to the @dfn{standard input}, -which usually means whatever you type on the terminal. This continues -until you indicate end--of--file by typing @kbd{Control-d}. - -For example, if you type: - -@example -awk '/th/' -@end example - -@noindent -whatever you type next will be taken as data for that @code{awk} -program. If you go on to type the following data, - -@example -Kathy -Ben -Tom -Beth -Seth -Karen -Thomas -@kbd{Control-d} -@end example - -@noindent -then @code{awk} will print - -@example -Kathy -Beth -Seth -@end example - -@noindent -@cindex Case sensitivity and gawk -@cindex Pattern, case sensitive -as matching the pattern @samp{th}. Notice that it did not recognize -@samp{Thomas} as matching the pattern. The @code{awk} language is -@dfn{case sensitive}, and matches patterns @emph{exactly}.@refill - -@node Long, Executable Scripts, Read Terminal, Running gawk -@subsection Running Long Programs - -@cindex running long programs -@cindex -f option -@cindex program file -@cindex file, @code{awk} program -Sometimes your @code{awk} programs can be very long. In this case it is -more convenient to put the program into a separate file. To tell -@code{awk} to use that file for its program, you type:@refill - -@example -awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{} -@end example - -The @samp{-f} tells the @code{awk} utility to get the @code{awk} program -from the file @var{source-file}. Any file name can be used for -@var{source-file}. For example, you could put the program:@refill - -@example -/th/ -@end example - -@noindent -into the file @file{th-prog}. Then the command: - -@example -awk -f th-prog -@end example - -@noindent -does the same thing as this one: - -@example -awk '/th/' -@end example - -@noindent -which was explained earlier (@pxref{Read Terminal}). Note that you -don't usually need single quotes around the file name that you specify -with @samp{-f}, because most file names don't contain any of the shell's -special characters. - -If you want to identify your @code{awk} program files clearly as such, -you can add the extension @file{.awk} to the filename. This doesn't -affect the execution of the @code{awk} program, but it does make -``housekeeping'' easier. - -@node Executable Scripts, Command Line, Long, Running gawk -@c node-name, next, previous, up -@subsection Executable @code{awk} Programs -@cindex Executable Scripts -@cindex Scripts, Executable -@cindex Self contained Programs -@cindex Program, Self contained -@cindex #! - -(The following section assumes that you are already somewhat familiar -with @code{awk}.) - -Once you have learned @code{awk}, you may want to write self--contained -@code{awk} scripts, using the @samp{#!} script mechanism. You can do -this on BSD Unix systems and GNU. - -For example, you could create a text file named @file{hello}, containing -the following (where @samp{BEGIN} is a feature we have not yet -discussed): - -@example -#! /bin/awk -f - -# a sample awk program - -BEGIN @{ print "hello, world" @} -@end example - -@noindent -After making this file executable (with the @code{chmod} command), you -can simply type: - -@example -hello -@end example - -@noindent -at the shell, and the system will arrange to run @code{awk} as if you -had typed: - -@example -awk -f hello -@end example - -@noindent -Self--contained @code{awk} scripts are particularly useful for putting -@code{awk} programs into production on your system, without your users -having to know that they are actually using an @code{awk} program. - -@cindex Shell Scripts -@cindex Scripts, Shell -If your system does not support the @samp{#!} mechanism, you can get a -similar effect using a regular shell script. It would look something -like this: - -@example -: a sample awk program - -awk '@var{program}' "$@@" -@end example - -Using this technique, it is @emph{vital} to enclose the @var{program} in -single quotes to protect it from interpretation by the shell. If you -omit the quotes, only a shell wizard can predict the result. - -The @samp{"$@@"} causes the shell to forward all the command line -arguments to the @code{awk} program, without interpretation. -@c Someday: (See @cite{The Bourne Again Shell}, by ??.) - -@c We don't refer to hoarded information. -@c (See -@c @cite{The UNIX Programming Environment} by Brian Kernighan and Rob Pike, -@c Prentice-Hall, 1984, for more information on writing shell programs that -@c use the Unix utilities. The most powerful version of the shell is the -@c Korn shell. A detailed description of the Korn shell can be found in -@c @cite{The KornShell Command and Programming Language} by Morris Bolsky -@c and David Korn, Prentice-Hall, 1989.) - -@node Command Line, , Executable Scripts, Running gawk -@c node-name, next, previous, up -@subsection Details of the @code{awk} Command Line -@cindex Command Line -@cindex Invocation of @code{gawk} -@cindex Arguments, Command Line -@cindex Options, Command Line - -(The following section assumes that you are already familiar with -@code{awk}.) - -There are two ways to run @code{awk}. Here are templates for both of -them; items enclosed in @samp{[} and @samp{]} in these templates are -optional. - -@example -awk [ -F@var{fs} ] [ -- ] '@var{program}' @var{file} @dots{} -awk [ -F@var{fs} ] -f @var{source-file} [ -f @var{source-file} @dots{} ] [ -- ] @var{file} @dots{} -@end example - -Options begin with a minus sign, and consist of a single character. -The options and their meanings are as follows: - -@table @code -@item -F@var{fs} -This sets the @code{FS} variable to @var{fs} (@pxref{Special}). -As a special case, if @var{fs} is @samp{t}, then @code{FS} will be set -to the tab character (@code{"\t"}). - -@item -f @var{source-file} -Indicates that the @code{awk} program is to be found in @var{source-file} -instead of in the first non--option argument. - -@item -- -This signals the end of the command line options. If you wish to -specify an input file named @file{-f}, you can precede it with the -@samp{--} argument to prevent the @file{-f} from being interpreted as an -option. This handling of @samp{--} follows the POSIX argument parsing -conventions. -@end table - -Any other options will be flagged as invalid with a warning message, but -are otherwise ignored. - -If the @samp{-f} option is @emph{not} used, then the first non--option -command line argument is expected to be the program text. - -The @samp{-f} option may be used more than once on the command line. -@code{awk} will read its program source from all of the named files, as -if they had been concatenated together into one big file. This is useful -for creating libraries of @code{awk} functions. Useful functions can be -written once, and then retrieved from a standard place, instead of having -to be included into each individual program. You can still type in a program -at the terminal and use library functions, by specifying @file{/dev/tty} -as one of the arguments to a @samp{-f}. Type your program, and end it -with the keyboard end--of--file character @kbd{Control-d}. - -Any additional arguments on the command line are made available to your -@code{awk} program in the @code{ARGV} array (@pxref{Special}). These -arguments are normally treated as input files to be processed in the -order specified. However, an argument that has the form -@var{var}@code{=}@var{value}, means to assign the value @var{value} to -the variable @var{var}---it does not specify a file at all. - -@vindex ARGV -Command line options and the program text (if present) are omitted from -the @code{ARGV} array. All other arguments, including variable assignments, -are included (@pxref{Special}). - -The distinction between file name arguments and variable--assignment -arguments is made when @code{awk} is about to open the next input file. -At that point in execution, it checks the ``file name'' to see whether -it is really a variable assignment; if so, instead of trying to read a -file it will, @emph{at that point in the execution}, assign the -variable. - -Therefore, the variables actually receive the specified values after all -previously specified files have been read. In particular, the values of -variables assigned in this fashion are @emph{not} available inside a -@code{BEGIN} rule (@pxref{BEGIN/END}), since such rules are run before -@code{awk} begins scanning the argument list.@refill - -@vindex OFS -@vindex ORS -@vindex RS -The variable assignment feature is most useful for assigning to variables -such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and -output formats, before listing the data files. It is also useful for -controlling state if multiple passes are needed over a data file. For -example:@refill - -@cindex Multiple passes over data -@cindex Passes, Multiple -@example -awk 'pass == 1 @{ @var{pass 1 stuff} @} - pass == 2 @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile -@end example - -@node Comments, Statements/Lines, Running gawk, Getting Started -@section Comments in @code{awk} Programs -@cindex Comments -@cindex Use of comments -@cindex Documenting @code{awk} programs -@cindex Programs, documenting - -When you write a complicated @code{awk} program, you can put @dfn{comments} -in the program file to help you remember what the program does, and how it -works. - -A comment starts with the the sharp sign character, @kbd{#}, and continues -to the end of the line. The @code{awk} language ignores the rest of a line -following a sharp sign. For example, we could have put the following into -@file{th-prog}:@refill - -@example -# This program finds records containing the pattern @samp{th}. This is how -# you continue comments on additional lines. -/th/ -@end example - -You can put comment lines into keyboard--composed throw--away @code{awk} -programs also, but this usually isn't very useful; the purpose of a -comment is to help yourself or another person understand the program at -another time. - -@node Statements/Lines, When, Comments, Getting Started -@section @code{awk} Statements versus Lines - -Most often, each line in an @code{awk} program is a separate statement or -separate rule, like this: - -@example -awk '/12/ @{ print $0 @} - /21/ @{ print $0 @}' BBS-list inventory-shipped -@end example - -But sometimes statements can be more than one line, and lines can contain -several statements. - -You can split a statement into multiple lines by inserting a newline after -any of the following: - -@example -, @{ ? : || && -@end example - -@noindent -Lines ending in @code{do} or @code{else} automatically have their -statements continued on the following line(s). A newline at any other -point ends the statement.@refill - -@cindex Backslash Continuation -@cindex Continuing statements on the next line -If you would like to split a single statement into two lines at a point -where a newline would terminate it, you can @dfn{continue} it by ending the -first line with a backslash character, @samp{\}. This is allowed -absolutely anywhere in the statement, even in the middle of a string or -regular expression. For example: - -@example -awk '/This program is too long, so continue it\ - on the next line/ @{ print $1 @}' -@end example - -@noindent -We have generally not used backslash continuation in the sample programs in -this manual. Since there is no limit on the length of a line, it is never -strictly necessary; it just makes programs prettier. We have preferred to -make them even more pretty by keeping the statements short. Backslash -continuation is most useful when your @code{awk} program is in a separate -source file, instead of typed in on the command line. - -@strong{Warning: this does not work if you are using the C shell.} -Continuation with backslash works for @code{awk} programs in files, and -also for one--shot programs @emph{provided} you are using the Bourne -shell, the Korn shell, or the Bourne--again shell. But the C shell used -on Berkeley Unix behaves differently! There, you must use two backslashes -in a row, followed by a newline.@refill - -@cindex Multiple statements on one line -When @code{awk} statements within one rule are short, you might want to put -more than one of them on a line. You do this by separating the statements -with semicolons, @samp{;}. -This also applies to the rules themselves. -Thus, the above example program could have been written:@refill - -@example -/12/ @{ print $0 @} ; /21/ @{ print $0 @} -@end example - -@noindent -@emph{Note:} It is a new requirement that rules on the same line require -semicolons as a separator in the @code{awk} language; it was done for -consistency with the statements in the action part of rules. - -@node When, , Statements/Lines, Getting Started -@section When to Use @code{awk} - -@cindex When to use @code{awk} -@cindex Applications of @code{awk} -What use is all of this to me, you might ask? Using additional operating -system utilities, more advanced patterns, field separators, arithmetic -statements, and other selection criteria, you can produce much more complex -output. The @code{awk} language is very useful for producing reports from -large amounts of raw data, like summarizing information from the output of -standard operating system programs such as @code{ls}. (@xref{More -Complex, , A More Complex Example}.) - -Programs written with @code{awk} are usually much smaller than they would -be in other languages. This makes @code{awk} programs easy to compose and -use. Often @code{awk} programs can be quickly composed at your terminal, -used once, and thrown away. Since @code{awk} programs are interpreted, you -can avoid the usually lengthy edit--compile--test--debug cycle of software -development. - -@cindex Emacs Lisp -Complex programs have been written in @code{awk}, including a complete -retargetable assembler for 8--bit microprocessors (@pxref{Glossary} for -more information) and a microcode assembler for a special purpose Prolog -computer. However, @code{awk}'s capabilities are strained by tasks of -such complexity. - -If you find yourself writing @code{awk} scripts of more than, say, a few -hundred lines, you might consider using a different programming -language. Emacs Lisp is a good choice if you need sophisticated string -or pattern matching capabilities. The shell is also good at string and -pattern matching; in addition it allows powerful use of the standard -utilities. More conventional languages like C, C++, or Lisp offer -better facilities for system programming and for managing the complexity -of large programs. Programs in these languages may require more lines -of source code than the equivalent @code{awk} programs, but they will be -easier to maintain and usually run more efficiently.@refill - -@node Reading Files, Printing, Getting Started, Top -@chapter Reading Files (Input) - -@cindex Reading files, general -@cindex Input, general -@cindex Standard input -@cindex Input, standard -@cindex General input -@vindex FILENAME -In the typical @code{awk} program, all input is read either from the -standard input (usually the keyboard) or from files whose names you -specify on the @code{awk} command line. If you specify input files, -@code{awk} reads data from the first one until it reaches the end; then -it reads the second file until it reaches the end, and so on. The name -of the current input file can be found in the special variable -@code{FILENAME} (@pxref{Special}).@refill - -The input is split automatically into @dfn{records}, and processed by -the rules one record at a time. (Records are the units of text -mentioned in the introduction; by default, a record is a line of text.) -Each record read is split automatically into @dfn{fields}, to make it -more convenient for a rule to work on parts of the record under -consideration. - -On rare occasions you will need to use the @code{getline} command, -which can do explicit input from any number of files. - -@menu -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Field Separators:: The field separator and how to change it. -* Multiple:: Reading multi--line records. - -* Assignment Options:: Setting variables on the command line and a summary - of command line syntax. This is an advanced method - of input. - -* Getline:: Reading files under explicit program control - using the @code{getline} function. -* Close Input:: Closing an input file (so you can read from - the beginning once more). -@end menu - -@node Records, Fields, , Reading Files -@section How Input is Split into Records - -@cindex Record separator, @code{RS} -The @code{awk} language divides its input into records and fields. -Records are separated from each other by the @dfn{record separator}. By -default, the record separator is the @dfn{newline} character. -Therefore, normally, a record is a line of text.@refill - -@cindex Changing the record separator -@vindex RS -Sometimes you may want to use a different character to separate your -records. You can use different characters by changing the special -variable @code{RS}. - -The value of @code{RS} is a string that says how to separate records; -the default value is @code{"\n"}, the string of just a newline -character. This is why lines of text are the default record. Although -@code{RS} can have any string as its value, only the first character of -the string will be used as the record separator. The other characters -are ignored. @code{RS} is exceptional in this regard; @code{awk} uses -the full value of all its other special variables.@refill - -@ignore -Someday this should be true! - -The value of @code{RS} is not limited to a one--character string. It can -be any regular expression (@pxref{Regexp}). In general, each record -ends at the next string that matches the regular expression; the next -record starts at the end of the matching string. This general rule is -actually at work in the usual case, where @code{RS} contains just a -newline: a record ends at the beginning of the next matching string (the -next newline in the input) and the following record starts just after -the end of this string (at the first character of the following line). -The newline, since it matches @code{RS}, is not part of either record. -@end ignore - -The value of @code{RS} is changed by @dfn{assigning} it a new value -(@pxref{Assignment Ops}). -One way to do this is at the beginning of your @code{awk} program, -before any input has been processed, using the special @code{BEGIN} -pattern (@pxref{BEGIN/END}). This way, @code{RS} is changed to its new -value before any input is read. The new value of @code{RS} is enclosed -in quotation marks. For example:@refill - -@example -awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list -@end example - -@noindent -changes the value of @code{RS} to @samp{/}, the slash character, before -reading any input. Records are now separated by a slash. The second -rule in the @code{awk} program (the action with no pattern) will proceed -to print each record. Since each @code{print} statement adds a newline -at the end of its output, the effect of this @code{awk} program is to -copy the input with each slash changed to a newline. - -Another way to change the record separator is on the command line, -using the variable--assignment feature (@pxref{Command Line}). - -@example -awk '@dots{}' RS="/" @var{source-file} -@end example - -@noindent -@code{RS} will be set to @samp{/} before processing @var{source-file}. - -The empty string (a string of no characters) has a special meaning -as the value of @code{RS}: it means that records are separated only -by blank lines. @xref{Multiple}, for more details. - -@cindex Number of records, @code{NR} -@cindex Number of records, @code{FNR} -@vindex NR -@vindex FNR -The @code{awk} utility keeps track of the number of records that have -been read so far from the current input file. This value is stored in a -special variable called @code{FNR}. It is reset to zero when a new file -is started. Another variable, @code{NR}, is the total number of input -records read so far from all files. It starts at zero but is never -automatically reset to zero. - -If you change the value of @code{RS} in the middle of an @code{awk} run, -the new value is used to delimit subsequent records, but the record -currently being processed (and records already finished) are not -affected. - -@node Fields, Non-Constant Fields, Records, Reading Files -@section Examining Fields - -@cindex Examining fields -@cindex Fields -@cindex Accessing fields -When @code{awk} reads an input record, the record is -automatically separated or @dfn{parsed} by the interpreter into pieces -called @dfn{fields}. By default, fields are separated by whitespace, -like words in a line. -Whitespace in @code{awk} means any string of one or more spaces and/or -tabs; other characters such as newline, formfeed, and so on, that are -considered whitespace by other languages are @emph{not} considered -whitespace by @code{awk}. - -The purpose of fields is to make it more convenient for you to refer to -these pieces of the record. You don't have to use them---you can -operate on the whole record if you wish---but fields are what make -simple @code{awk} programs so powerful. - -@cindex @code{$} (field operator) -@cindex Operators, @code{$} -To refer to a field in an @code{awk} program, you use a dollar--sign, -@samp{$}, followed by the number of the field you want. Thus, @code{$1} -refers to the first field, @code{$2} to the second, and so on. For -example, suppose the following is a line of input:@refill - -@example -This seems like a pretty nice example. -@end example - -@noindent -Here the first field, or @code{$1}, is @samp{This}; the second field, or -@code{$2}, is @samp{seems}; and so on. Note that the last field, -@code{$7}, is @samp{example.}. Because there is no space between the -@samp{e} and the @samp{.}, the period is considered part of the seventh -field.@refill - -@cindex @code{$NF}, last field in record -No matter how many fields there are, the last field in a record can be -represented by @code{$NF}. So, in the example above, @code{$NF} would -be the same as @code{$7}, which is @samp{example.}. Why this works is -explained below (@pxref{Non-Constant Fields}). If you try to refer to a -field beyond the last one, such as @code{$8} when the record has only 7 -fields, you get the empty string. - -@vindex NF -@cindex Number of fields, @code{NF} -Plain @code{NF}, with no @samp{$}, is a special variable whose value -is the number of fields in the current record. - -@code{$0}, which looks like an attempt to refer to the zeroth field, is -a special case: it represents the whole input record. This is what you -would use when you aren't interested in fields. - -Here are some more examples: - -@example -awk '$1 ~ /foo/ @{ print $0 @}' BBS-list -@end example - -@noindent -This example contains the @dfn{matching} operator @code{~} -(@pxref{Comparison Ops}). Using this operator, all records in the file -@file{BBS-list} whose first field contains the string @samp{foo} are -printed.@refill - -By contrast, the following example: - -@example -awk '/foo/ @{ print $1, $NF @}' BBS-list -@end example - -@noindent -looks for the string @samp{foo} in @emph{the entire record} and prints -the first field and the last field for each input record containing the -pattern.@refill - -The following program will search the system password file, and print -the entries for users who have no password. - -@example -awk -F: '$2 == ""' /etc/passwd -@end example - -@noindent -This program uses the @samp{-F} option on the command line to set the -file separator. (Fields in @file{/etc/passwd} are separated by colons. -The second field represents a user's encrypted password, but if the -field is empty, that user has no password.) - -@node Non-Constant Fields, Changing Fields, Fields, Reading Files -@section Non-constant Field Numbers - -The number of a field does not need to be a constant. Any expression in -the @code{awk} language can be used after a @samp{$} to refer to a -field. The @code{awk} utility evaluates the expression and uses the -@dfn{numeric value} as a field number. Consider this example:@refill - -@example -awk '@{ print $NR @}' -@end example - -@noindent -Recall that @code{NR} is the number of records read so far: 1 in the -first record, 2 in the second, etc. So this example will print the -first field of the first record, the second field of the second record, -and so on. For the twentieth record, field number 20 will be printed; -most likely this will make a blank line, because the record will not -have 20 fields. - -Here is another example of using expressions as field numbers: - -@example -awk '@{ print $(2*2) @}' BBS-list -@end example - -The @code{awk} language must evaluate the expression @samp{(2*2)} and use -its value as the field number to print. The @samp{*} sign represents -multiplication, so the expression @samp{2*2} evaluates to 4. This example, -then, prints the hours of operation (the fourth field) for every line of the -file @file{BBS-list}.@refill - -@cindex Fields, negative-numbered -@cindex Negative-numbered fields -When you use non--constant field numbers, you may ask for a field -with a negative number. This always results in an empty string, just -like a field whose number is too large for the input record. For -example, @samp{$(1-4)} would try to examine field number -3; it would -result in an empty string. - -If the field number you compute is zero, you get the entire record. - -The number of fields in the current record is stored in the special variable -@code{NF} (@pxref{Special}). The expression @samp{$NF} is not a special -feature: it is the direct consequence of evaluating @code{NF} and using -its value as a field number. - -@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files -@section Changing the Contents of a Field - -@cindex Field, changing contents of -@cindex Changing contents of a field -You can change the contents of a field as seen by @code{awk} within an -@code{awk} program; this changes what @code{awk} perceives as the -current input record. (The actual input is untouched: @code{awk} never -modifies the input file.) - -Look at this example: - -@example -awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped -@end example - -@noindent -The @samp{-} sign represents subtraction, so this program reassigns -field three, @code{$3}, to be the value of field two minus ten, -@samp{@code{$2} - 10}. (@xref{Arithmetic Ops}.) Then field two, and the -new value for field three, are printed. - -In order for this to work, the text in field @code{$2} must make sense -as a number; the string of characters must be converted to a number in -order for the computer to do arithmetic on it. The number resulting -from the subtraction is converted back to a string of characters which -then becomes field 3. @xref{Conversion}. - -When you change the value of a field (as perceived by @code{awk}), the -text of the input record is recalculated to contain the new field where -the old one was. @code{$0} will from that time on reflect the altered -field. Thus, - -@example -awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped -@end example - -@noindent -will print a copy of the input file, with 10 subtracted from the second -field of each line. - -You can also assign contents to fields that are out of range. For -example: - -@example -awk '@{ $6 = ($5 + $4 + $3 + $2)/4) ; print $6 @}' inventory-shipped -@end example - -@noindent -We've just created @code{$6}, whose value is the average of fields -@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign represents -addition, and the @samp{/} sign represents division. For the file -@file{inventory-shipped} @code{$6} represents the average number of parcels -shipped for a particular month. - -Creating a new field changes what @code{awk} interprets as the current -input record. The value of @code{$0} will be recomputed. This -recomputation affects and is affected by features not yet discussed, in -particular, the @dfn{Output Field Separator}, @code{OFS}, which is used -to separate the fields (@pxref{Output Separators}), and @code{NF} (the -number of fields; @pxref{Fields}). For example, the value of @code{NF} -will be set to the number of the highest out--of--range field you -create.@refill - -Note, however, that merely @emph{referencing} an out--of--range field -will @emph{not} change the value of either @code{$0} or @code{NF}. -Referencing an out--of--range field merely produces a null string. For -example:@refill - -@example -if ($(NF+1) != "") - print "can't happen" -else - print "everything is normal" -@end example - -@noindent -should print @samp{everything is normal}. (@xref{If}, for more -information about @code{awk}'s @samp{if-else} statements.) - -@node Field Separators, Multiple, Changing Fields, Reading Files -@section Specifying How Fields Are Separated - -@vindex FS -@cindex Fields, semantics of -@cindex Fields, separating -@cindex Field separator, @code{FS} -You can change the way @code{awk} splits a record into fields by changing the -value of the @dfn{field separator}. The field separator is represented by -the special variable @code{FS} in an @code{awk} program, and can be set -by @samp{-F} on the command line. The @code{awk} language scans each input -line for the field separator character to determine the positions of fields -within that line. Shell programmers take note! @code{awk} uses the variable -@code{FS}, not @code{IFS}.@refill - -The default value of the field separator is a string containing a single -space. This value is actually a special case; as you know, by default, fields -are separated by whitespace sequences, not by single spaces: two spaces -in a row do not delimit an empty field. ``Whitespace'' is defined as sequences -of one or more spaces or tab characters. - -You change the value of @code{FS} by @dfn{assigning} it a new value. You -can do this using the special @code{BEGIN} pattern (@pxref{BEGIN/END}). -This pattern allows you to change the value of @code{FS} before any input is -read. The new value of @code{FS} is enclosed in quotations. For example, -set the value of @code{FS} to the string @samp{","}: - -@example -awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}' -@end example - -@noindent -and use the input line:@refill - -@example -John Q. Smith, 29 Oak St., Walamazoo, MI 42139 -@end example - -@noindent -This @code{awk} program will extract the string @samp{29 Oak St.}. - -@cindex Separator character, choice of -@cindex Field separator, choice of -@cindex Regular expressions, field separators and -Sometimes your input data will contain separator characters that don't -separate fields the way you thought they would. For instance, the person's -name in the example we've been using might have a title or suffix attached, -such as @samp{John Q. Smith, LXIX}. If you assigned @code{FS} to be -@samp{,} then: - -@example -awk 'BEGIN @{ FS = "," @} ; @{ print $2 @} -@end example - -@noindent -would extract @samp{LXIX}, instead of @samp{29 Oak St.}. If you were -expecting the program to print the address, you would be surprised. So, -choose your data layout and separator characters carefully to prevent -problems like this from happening.@refill - -You can assign @code{FS} to be a series of characters. For example, the -assignment:@refill - -@example -FS = ", \t" -@end example - -@noindent -makes every area of an input line that consists of a comma followed by a -space and a tab, into a field separator. (@samp{\t} stands for a -tab.)@refill - -If @code{FS} is any single character other than a blank, then that character -is used as the field separator, and two successive occurrences of that -character do delimit an empty field. - -If you assign @code{FS} to a string longer than one character, that string -is evaluated as a @dfn{regular expression} (@pxref{Regexp}). The value of -the regular expression is used as a field separator. - -@cindex Field separator, setting on command line -@cindex Command line, setting @code{FS} on -@code{FS} can be set on the command line. You use the @samp{-F} argument to -do so. For example: - -@example -awk -F, '@var{program}' @var{input-files} -@end example - -@noindent -sets @code{FS} to be the @samp{,} character. Notice that the argument uses -a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file -containing an @code{awk} program. Case is significant in command options: -the @samp{-F} and @samp{-f} options have nothing to do with each other. -You can use both options at the same time to set the @code{FS} argument -@emph{and} get an @code{awk} program from a file. - -As a special case, if the argument to @samp{-F} is @samp{t}, then @code{FS} -is set to the tab character. (This is because if you type @samp{-F\t}, -without the quotes, at the shell, the @samp{\} gets deleted, so @code{awk} -figures that you really want your fields to be separated with tabs, and -not @samp{t}s. Use @code{FS="t"} if you really do want to separate your -fields with @samp{t}s.) - -For example, let's use an @code{awk} program file called @file{baud.awk} -that contains the pattern @samp{/300/}, and the action @samp{print $1}. -We'll use the operating system utility @code{cat} to ``look'' at our -program:@refill - -@example -% cat baud.awk -/300/ @{ print $1 @} -@end example - -Let's also set @code{FS} to be the @samp{-} character. We will apply -all this information to the file @file{BBS-list}. This @code{awk} program -will now print a list of the names of the bulletin boards that operate at -300 baud and the first three digits of their phone numbers.@refill - -@example -awk -F- -f baud.awk BBS-list -@end example - -@noindent -produces this output: - -@example -aardvark 555 -alpo -barfly 555 -bites 555 -camelot 555 -core 555 -fooey 555 -foot 555 -macfoo 555 -sdace 555 -sabafoo 555 -@end example - -@noindent -Note the second line of output. If you check the original file, you will -see that the second line looked like this: - -@example -alpo-net 555-3412 2400/1200/300 A -@end example - -The @samp{-} as part of the system's name was used as the field -separator, instead of the @samp{-} in the phone number that was -originally intended. This demonstrates why you have to be careful in -choosing your field and record separators. - -@node Multiple, Assignment Options, Field Separators, Reading Files -@section Multiple--Line Records - -@cindex Multiple line records -@cindex Input, multiple line records -@cindex Reading files, multiple line records -@cindex Records, multiple line -In some data bases, a single line cannot conveniently hold all the information -in one entry. Then you will want to use multi--line records. - -The first step in doing this is to choose your data format: when records -are not defined as single lines, how will you want to define them? -What should separate records? - -One technique is to use an unusual character or string to separate -records. For example, you could use the formfeed character (written -@samp{\f} in @code{awk}, as in C) to separate them, making each record -a page of the file. To do this, just set the variable @code{RS} to -@code{"\f"} (a string containing the formfeed character), or whatever -string you prefer to use. - -@ignore -Another technique is to have blank lines separate records. The string -@code{"^\n+"} is a regular expression that matches any sequence of -newlines starting at the beginning of a line---in other words, it -matches a sequence of blank lines. If you set @code{RS} to this string, -a record will always end at the first blank line encountered. In -addition, a regular expression always matches the longest possible -sequence when there is a choice. So the next record won't start until -the first nonblank line that follows---no matter how many blank lines -appear in a row, they will be consider one record--separator. -@end ignore - -Another technique is to have blank lines separate records. -By a special dispensation, a null string as the value of @code{RS} -indicates that records are separated by one or more blank lines. -If you set @code{RS} to the null string, -a record will always end at the first blank line encountered. -And the next record won't start until -the first nonblank line that follows---no matter how many blank lines -appear in a row, they will be considered one record--separator.@refill - -The second step is to separate the fields in the record. One way to -do this is to put each field on a separate line: to do this, just set -the variable @code{FS} to the string @code{"\n"}. (This -simple regular expression matches a single newline.) Another idea is to -divide each of the lines into fields in the normal manner; the regular -expression @w{@code{"[ \t\n]+"}} will do this nicely by treating the newlines -inside the record just like spaces.@refill - -When @code{RS} is set to the null string, the newline character @emph{always} -acts as a field separator. This is in addition to whatever value @code{FS} -has. The probable reason for this rule is so that you get rational -behavior in the default case (i.e. @w{@code{FS == " "}}). This can be -a problem if you really don't want the newline character to separate -fields, since there is no way to do that. However, you can work around this -by using the @code{split} function to manually break up your data -(@pxref{String Functions}). - -@ignore -Here are two ways to use records separated by blank lines and break each -line into fields normally: - -@example -awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} @{ print $0 @}' BBS-list - -@exdent @r{or} - -awk 'BEGIN @{ RS = "^\n+"; FS = "[ \t\n]+" @} @{ print $0 @}' BBS-list -@end example -@end ignore - -Here is how to use records separated by blank lines and break each -line into fields normally: - -@example -awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} ; @{ print $0 @}' BBS-list -@end example - -@node Assignment Options, Getline, Multiple, Reading Files -@section Assigning Variables on the Command Line - -You can include variable @dfn{assignments} among the file names on the -command line used to invoke @code{awk} (@pxref{Command Line}). Such -assignments have the form: - -@example -@var{variable}=@var{text} -@end example - -@noindent -and allow you to change variables either at the beginning of the -@code{awk} run or in between input files. The variable assignment is -performed at a time determined by its position among the input file -arguments: after the processing of the preceding input file argument. -For example: - -@example -awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list -@end example - -@noindent -prints the value of field number @code{n} for all input records. Before -the first file is read, the command line sets the variable @code{n} -equal to 4. This causes the fourth field of the file -@file{inventory-shipped} to be printed. After the first file has -finished, but before the second file is started, @code{n} is set to 2, -so that the second field of the file @file{BBS-list} will be printed. - -Command line arguments are made available for explicit examination by -the @code{awk} program in an array named @code{ARGV} (@pxref{Special}). - -@node Getline, , Assignment Options, Reading Files -@section Explicit Input with @code{getline} - -@findex getline -@cindex Input, @code{getline} function -@cindex Reading files, @code{getline} function -So far we have been getting our input files from @code{awk}'s main -input stream---either the standard input (usually your terminal) or the -files specified on the command line. The @code{awk} language has a -special built--in function called @code{getline} that -can be used to read input under your explicit control. - -This command is quite complex and should @emph{not} be used by -beginners. The command (and its variations) is covered here because -this is the section about input. The examples that follow the -explanation of the @code{getline} command include material that has not -been covered yet. Therefore, come back and attempt the @code{getline} -command @emph{after} you have reviewed the rest of this manual and have -a good knowledge of how @code{awk} works. - -When retrieving input, @code{getline} returns a 1 if it found a record, and -a 0 if the end of the file was encountered. If there was some error in -getting a record, such as a file that could not be opened, then @code{getline} -returns a -1. - -In the following examples, @var{command} stands for a string value that -represents a shell command. - -@table @code -@item getline -The @code{getline} function can be used by itself, in an @code{awk} -program, to read input from the current input. All it does in this -case is read the next input record and split it up into fields. This -is useful if you've finished processing the current record, but you -want to do some special processing @emph{right now} on the next -record. Here's an example:@refill - -@example -awk '@{ - if (t = index($0, "/*")) @{ - if(t > 1) - tmp = substr($0, 1, t - 1) - else - tmp = "" - u = index(substr($0, t + 2), "*/") - while (! u) @{ - getline - t = -1 - u = index($0, "*/") - @} - if(u <= length($0) - 2) - $0 = tmp substr($0, t + u + 3) - else - $0 = tmp - @} - print $0 -@}' -@end example - -This @code{awk} program deletes all comments, @samp{/* @dots{} -*/}, from the input. By replacing the @samp{print $0} with other -statements, you could perform more complicated processing on the -de--commented input, such as search it for matches for a regular -expression. - -This form of the @code{getline} command sets @code{NF} (the number of -fields; @pxref{Fields}), @code{NR} (the number of records read so far), the -@code{FNR} variable (@pxref{Records}), and the value of @code{$0}. - -@emph{Note:} The new value of @code{$0} will be used in testing -the patterns of any subsequent rules. The original value -of @code{$0} that triggered the rule which executed @code{getline} -is lost. By contrast, the @code{next} statement reads a new record -but immediately begins processing it normally, starting with the first -rule in the program. @xref{Next}. - -@item getline @var{var} -This form of @code{getline} reads a record into the variable @var{var}. -This is useful when you want your program to read the next record from the -input file, but you don't want to subject the record to the normal input -processing. - -For example, suppose the next line is a comment, or a special string, -and you want to read it, but you must make certain that it won't -accidentally trigger any rules. This version of @code{getline} will -allow you to read that line and store it in a variable so that the main -read--a--line--and--check--each--rule loop of @code{awk} never sees it. - -The following example swaps every two lines of input. For example, given: - -@example -wan -tew -free -phore -@end example - -@noindent -it outputs: - -@example -tew -wan -phore -free -@end example - -@noindent -Here's the program: - -@example -awk '@{ - if ((getline tmp) > 0) @{ - print tmp - print $0 - @} else - print $0 -@}' -@end example - -The @code{getline} function used in this way sets only @code{NR} and -@code{FNR} (and of course, @var{var}). The record is not split into fields, -so the values of the fields (including @code{$0}) and the value of @code{NF} -do not change.@refill - -@item getline < @var{file} -This form of the @code{getline} function takes its input from the file -@var{file}. Here @var{file} is a string--valued expression that -specifies the file name. - -This form is useful if you want to read your input from a particular -file, instead of from the main input stream. For example, the following -program reads its input record from the file @file{foo.input} when it -encounters a first field with a value equal to 10 in the current input -file.@refill - -@example -awk '@{ -if ($1 == 10) @{ - getline < "foo.input" - print -@} else - print -@}' -@end example - -Since the main input stream is not used, the values of @code{NR} and -@code{FNR} are not changed. But the record read is split into fields in -the normal manner, so the values of @code{$0} and other fields are -changed. So is the value of @code{NF}. - -This does not cause the record to be tested against all the patterns -in the @code{awk} program, in the way that would happen if the record -were read normally by the main processing loop of @code{awk}. However -the new record is tested against any subsequent rules, just as when -@code{getline} is used without a redirection. - -@item getline @var{var} < @var{file} -This form of the @code{getline} function takes its input from the file -@var{file} and puts it in the variable @var{var}. As above, @var{file} -is a string--valued expression that specifies the file to read from. - -In this version of @code{getline}, none of the built--in variables are -changed, and the record is not split into fields. The only variable -changed is @var{var}. - -For example, the following program copies all the input files to the -output, except for records that say @w{@code{@@include @var{filename}}}. -Such a record is replaced by the contents of the file -@var{filename}.@refill - -@example -awk '@{ - if (NF == 2 && $1 == "@@include") @{ - while ((getline line < $2) > 0) - print line - close($2) - @} else - print -@}' -@end example - -Note here how the name of the extra input file is not built into -the program; it is taken from the data, from the second field on -the @samp{@@include} line. - -The @code{close} command is used to ensure that if two identical -@samp{@@include} lines appear in the input, the entire specified file is -included twice. @xref{Close Input}. - -One deficiency of this program is that it does not process nested -@samp{@@include} statements the way a true macro preprocessor would. - -@item @var{command} | getline -You can @dfn{pipe} the output of a command into @code{getline}. A pipe is -simply a way to link the output of one program to the input of another. In -this case, the string @var{command} is run as a shell command and its output -is piped into @code{awk} to be used as input. This form of @code{getline} -reads one record from the pipe. - -For example, the following program copies input to output, except for lines -that begin with @samp{@@execute}, which are replaced by the output produced by -running the rest of the line as a shell command: - -@example -awk '@{ - if ($1 == "@@execute") @{ - tmp = substr($0, 10) - while ((tmp | getline) > 0) - print - close(tmp) - @} else - print -@}' -@end example - -@noindent -The @code{close} command is used to ensure that if two identical -@samp{@@execute} lines appear in the input, the command is run again -for each one. @xref{Close Input}. - -Given the input: - -@example -foo -bar -baz -@@execute who -bletch -@end example - -@noindent -the program might produce: - -@example -foo -bar -baz -hack ttyv0 Jul 13 14:22 -hack ttyp0 Jul 13 14:23 (gnu:0) -hack ttyp1 Jul 13 14:23 (gnu:0) -hack ttyp2 Jul 13 14:23 (gnu:0) -hack ttyp3 Jul 13 14:23 (gnu:0) -bletch -@end example - -@noindent -Notice that this program ran the command @code{who} and printed the result. -(If you try this program yourself, you will get different results, showing -you logged in.) - -This variation of @code{getline} splits the record into fields, sets the -value of @code{NF} and recomputes the value of @code{$0}. The values of -@code{NR} and @code{FNR} are not changed. - -@item @var{command} | getline @var{var} -The output of the command @var{command} is sent through a pipe to -@code{getline} and into the variable @var{var}. For example, the -following program reads the current date and time into the variable -@code{current_time}, using the utility called @code{date}, and then -prints it.@refill - -@group -@example -awk 'BEGIN @{ - "date" | getline current_time - close("date") - print "Report printed on " current_time -@}' -@end example -@end group - -In this version of @code{getline}, none of the built--in variables are -changed, and the record is not split into fields. -@end table - -@node Close Input, , , Getline -@subsection Closing Input Files -@cindex @code{close} statement for input - -If the same file name or the same shell command is used with -@code{getline} more than once during the execution of the @code{awk} -program, the file is opened (or the command is executed) only the first time. -At that time, the first record of input is read from that file or command. -The next time the same file or command is used in @code{getline}, another -record is read from it, and so on. - -What this implies is that if you want to start reading the same file -again from the beginning, or if you want to rerun a shell command -(rather that reading more output from the command), you must take -special steps. What you can do is use the @code{close} statement: - -@example -close (@var{filename}) -@end example - -@noindent -This statement closes a file or pipe, represented here by -@var{filename}. The string value of @var{filename} must be the same -value as the string used to open the file or pipe to begin with. - -Once this statement is executed, the next @code{getline} from that file -or command will reopen the file or rerun the command. - -@node Printing, One-liners, Reading Files, Top -@chapter Printing Output - -@cindex Printing, general -@cindex Output -One of the most common things that actions do is to output or @dfn{print} -some or all of the input. For simple output, use the @code{print} -statement. For fancier formatting use the @code{printf} statement. -Both are described in this chapter. - -@menu -* Print:: The @code{print} statement. -* Print Examples:: Simple examples of @code{print} statements. -* Output Separators:: The output separators and how to change them. - -* Redirection:: How to redirect output to multiple files and pipes. -* Close Output:: How to close output files and pipes. - -* Printf:: The @code{printf} statement. -@end menu - -@node Print, Print Examples, , Printing -@section The @code{print} Statement -@cindex @code{print} statement - -The @code{print} statement does output with simple, standardized -formatting. You specify only the strings or numbers to be printed, in a -list separated by commas. They are output, separated by single spaces, -followed by a newline. The statement looks like this: - -@example -print @var{item1}, @var{item2}, @dots{} -@end example - -@noindent -The entire list of items may optionally be enclosed in parentheses. The -parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a redirection -(@pxref{Redirection}). The relational operators are @samp{==}, -@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and -@samp{!~} (@pxref{Comparison Ops}).@refill - -The items printed can be constant strings or numbers, fields of the -current record (such as @code{$1}), variables, or any @code{awk} -expressions. The @code{print} statement is completely general for -computing @emph{what} values to print. With one exception -(@pxref{Output Separators}), what you can't do is specify @emph{how} to -print them---how many columns to use, whether to use exponential -notation or not, and so on. For that, you need the @code{printf} -statement (@pxref{Printf}). - -To print a fixed piece of text, write a string constant as one item, -such as @w{@code{"Hello there"}}. If you forget to use the double--quote -characters, your text will be taken as an @code{awk} expression, and -you will probably get an error. Keep in mind that a space will be printed -between any two items. - -The simple statement @samp{print} with no items is equivalent to -@samp{print $0}: it prints the entire current record. To print a blank -line, use @samp{print ""}, where @code{""} is the null, or empty, -string. - -Most often, each @code{print} statement makes one line of output. But it -isn't limited to one line. If an item value is a string that contains a -newline, the newline is output along with the rest of the string. A -single @code{print} can make any number of lines this way. - -@node Print Examples, Output Separators, Print, Printing -@section Examples of @code{print} Statements - -Here is an example that prints the first two fields of each input record, -with a space between them: - -@example -awk '@{ print $1, $2 @}' inventory-shipped -@end example - -@noindent -Its output looks like this: - -@example -Jan 13 -Feb 15 -Mar 15 -@dots{} -@end example - -A common mistake in using the @code{print} statement is to omit the comma -between two items. This often has the effect of making the items run -together in the output, with no space. The reason for this is that -juxtaposing two string expressions in @code{awk} means to concatenate -them. For example, without the comma: - -@example -awk '@{ print $1 $2 @}' inventory-shipped -@end example - -@noindent -prints: - -@example -Jan13 -Feb15 -Mar15 -@dots{} -@end example - -Neither example's output makes much sense to someone unfamiliar with the -file @file{inventory-shipped}. A heading line at the beginning would make -it clearer. Let's add some headings to our table of months (@code{$1}) and -green crates shipped (@code{$2}). We do this using the BEGIN pattern -(@pxref{BEGIN/END}) to cause the headings to be printed only once: - -@c the formatting is strange here because the @{ becomes just a brace. -@example -awk 'BEGIN @{ print "Month Crates" - print "----- ------" @} - @{ print $1, $2 @}' inventory-shipped -@end example - -@noindent -Did you already guess what will happen? This program prints the following: - -@group -@example -Month Crates ------ ------ -Jan 13 -Feb 15 -Mar 15 -@dots{} -@end example -@end group - -@noindent -The headings and the table data don't line up! We can fix this by printing -some spaces between the two fields: - -@example -awk 'BEGIN @{ print "Month Crates" - print "----- ------" @} - @{ print $1, " ", $2 @}' inventory-shipped -@end example - -You can imagine that this way of lining up columns can get pretty -complicated when you have many columns to fix. Counting spaces for two -or three columns can be simple, but more than this and you can get -``lost'' quite easily. This is why the @code{printf} statement was -created (@pxref{Printf}); one of its specialties is lining up columns of -data. - -@node Output Separators, Redirection, Print Examples, Printing -@section Output Separators - -@cindex Output field separator, @code{OFS} -@vindex OFS -@vindex ORS -@cindex Output record separator, @code{ORS} -As mentioned previously, a @code{print} statement contains a list -of items, separated by commas. In the output, the items are normally -separated by single spaces. But they do not have to be spaces; a -single space is only the default. You can specify any string of -characters to use as the @dfn{output field separator}, by setting the -special variable @code{OFS}. The initial value of this variable -is the string @w{@code{" "}}. - -The output from an entire @code{print} statement is called an -@dfn{output record}. Each @code{print} statement outputs one output -record and then outputs a string called the @dfn{output record separator}. -The special variable @code{ORS} specifies this string. The initial -value of the variable is the string @code{"\n"} containing a newline -character; thus, normally each @code{print} statement makes a separate line. - -You can change how output fields and records are separated by assigning -new values to the variables @code{OFS} and/or @code{ORS}. The usual -place to do this is in the @code{BEGIN} rule (@pxref{BEGIN/END}), so -that it happens before any input is processed. You may also do this -with assignments on the command line, before the names of your input -files. - -The following example prints the first and second fields of each input -record separated by a semicolon, with a blank line added after each -line:@refill - -@example -awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @} - @{ print $1, $2 @}' BBS-list -@end example - -If the value of @code{ORS} does not contain a newline, all your output -will be run together on a single line, unless you output newlines some -other way. - -@node Redirection, Printf, Output Separators, Printing -@section Redirecting Output of @code{print} and @code{printf} - -@cindex Output redirection -@cindex Redirection of output -@cindex @code{>} -@cindex @code{>>} -@cindex @code{|} -@ignore -@strong{ADR: This section and the section on closing files and pipes should -come @emph{after} the section on @code{printf}. @emph{First} describe -all the options for output, and @emph{then} describe how to redirect -the output.} -@end ignore - -So far we have been dealing only with output that prints to the standard -output, usually your terminal. Both @code{print} and @code{printf} can be -told to send their output to other places. This is called -@dfn{redirection}.@refill - -A redirection appears after the @code{print} or @code{printf} statement. -Redirections in @code{awk} are written just like redirections in shell -commands, except that they are written inside the @code{awk} program. - -Here are the three forms of output redirection. They are all shown for -the @code{print} statement, but they work for @code{printf} also. - -@table @code -@item print @var{items} > @var{output-file} -This type of redirection prints the items onto the output file -@var{output-file}. The file name @var{output-file} can be any -expression. Its value is changed to a string and then used as a -filename (@pxref{Expressions}).@refill - -When this type of redirection is used, the @var{output-file} is erased -before the first output is written to it. Subsequent writes do not -erase @var{output-file}, but append to it. If @var{output-file} does -not exist, then it is created.@refill - -For example, here is how one @code{awk} program can write a list of -BBS names to a file @file{name-list} and a list of phone numbers to a -file @file{phone-list}. Each output file contains one name or number -per line. - -@example -awk '@{ print $2 > "phone-list" - print $1 > "name-list" @}' BBS-list -@end example - -@item print @var{items} >> @var{output-file} -This type of redirection prints the items onto the output file -@var{output-file}. The difference between this and the -single--@samp{>} redirection is that the old contents (if any) of -@var{output-file} are not erased. Instead, the @code{awk} output is -appended to the file. - -@cindex Pipes for output -@cindex Output, piping -@item print @var{items} | @var{command} -It is also possible to send output through a @dfn{pipe} instead of into a -file. This type of redirection opens a pipe to @var{command} and writes -the values of @var{items} through this pipe, to another process created -to execute @var{command}.@refill - -The redirection argument @var{command} is actually an @code{awk} -expression. Its value is converted to a string, whose contents give the -shell command to be run. - -For example, this produces two files, one unsorted list of BBS names -and one list sorted in reverse alphabetical order: - -@example -awk '@{ print $1 > "names.unsorted" - print $1 | "sort -r > names.sorted" @}' BBS-list -@end example - -Here the unsorted list is written with an ordinary redirection while -the sorted list is written by piping through the @code{sort} utility. - -Here is an example that uses redirection to mail a message to a mailing -list @samp{bug-system}. This might be useful when trouble is encountered -in an @code{awk} script run periodically for system maintenance. - -@example -print "Awk script failed:", $0 | "mail bug-system" -print "processing record number", FNR, "of", FILENAME | "mail bug-system" -close ("mail bug-system") -@end example - -We use a @code{close} statement here because it's a good idea to close -the pipe as soon as all the intended output has been sent to it. -@xref{Close Output}, for more information on this. -@end table - -Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system -to open a file or pipe only if the particular @var{file} or @var{command} -you've specified has not already been written to by your program.@refill - -@node Close Output, , , Redirection -@subsection Closing Output Files and Pipes -@cindex @code{close} statement for output -@cindex Closing files and pipes - -When a file or pipe is opened, the filename or command associated with -it is remembered by @code{awk} and subsequent writes to the same file or -command are appended to the previous writes. The file or pipe stays -open until @code{awk} exits. This is usually convenient. - -Sometimes there is a reason to close an output file or pipe earlier -than that. To do this, use the @code{close} command, as follows: - -@example -close (@var{filename}) -@end example - -@noindent -or - -@example -close (@var{command}) -@end example - -The argument @var{filename} or @var{command} can be any expression. -Its value must exactly equal the string used to open the file or pipe -to begin with---for example, if you open a pipe with this: - -@example -print $1 | "sort -r > names.sorted" -@end example - -@noindent -then you must close it with this: - -@example -close ("sort -r > names.sorted") -@end example - -Here are some reasons why you might need to close an output file: - -@itemize @bullet -@item -To write a file and read it back later on in the same @code{awk} -program. Close the file when you are finished writing it; then -you can start reading it with @code{getline} (@pxref{Getline}). - -@item -To write numerous files, successively, in the same @code{awk} -program. If you don't close the files, eventually you will exceed the -system limit on the number of open files in one process. So close -each one when you are finished writing it. - -@item -To make a command finish. When you redirect output through a pipe, -the command reading the pipe normally continues to try to read input -as long as the pipe is open. Often this means the command cannot -really do its work until the pipe is closed. For example, if you -redirect output to the @code{mail} program, the message will not -actually be sent until the pipe is closed. - -@item -To run the same subprogram a second time, with the same arguments. -This is not the same thing as giving more input to the first run! - -For example, suppose you pipe output to the @code{mail} program. If you -output several lines redirected to this pipe without closing it, they make -a single message of several lines. By contrast, if you close the pipe -after each line of output, then each line makes a separate message. -@end itemize - -@node Printf, , Redirection, Printing -@section Using @code{printf} Statements For Fancier Printing -@cindex Formatted output -@cindex Output, formatted - -If you want more precise control over the output format than -@code{print} gives you, use @code{printf}. With @code{printf} you can -specify the width to use for each item, and you can specify various -stylistic choices for numbers (such as what radix to use, whether to -print an exponent, whether to print a sign, and how many digits to print -after the decimal point). You do this by specifying a @dfn{format -string}. - -@menu -* Basic Printf:: Syntax of the @code{printf} statement. -* Format-Control:: Format-control letters. -* Modifiers:: Format--specification modifiers. -* Printf Examples:: Several examples. -@end menu - -@node Basic Printf, Format-Control, , Printf -@subsection Introduction to the @code{printf} Statement - -@cindex @code{printf} statement, format of -The @code{printf} statement looks like this:@refill - -@example -printf @var{format}, @var{item1}, @var{item2}, @dots{} -@end example - -@noindent -The entire list of items may optionally be enclosed in parentheses. The -parentheses are necessary if any of the item expressions uses a -relational operator; otherwise it could be confused with a redirection -(@pxref{Redirection}). The relational operators are @samp{==}, -@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and -@samp{!~} (@pxref{Comparison Ops}).@refill - -@cindex Format string -The difference between @code{printf} and @code{print} is the argument -@var{format}. This is an expression whose value is taken as a string; its -job is to say how to output each of the other arguments. It is called -the @dfn{format string}. - -The format string is essentially the same as in the C library function -@code{printf}. Most of @var{format} is text to be output verbatim. -Scattered among this text are @dfn{format specifiers}, one per item. -Each format specifier says to output the next item at that place in the -format.@refill - -The @code{printf} statement does not automatically append a newline to its -output. It outputs nothing but what the format specifies. So if you want -a newline, you must include one in the format. The output separator -variables @code{OFS} and @code{ORS} have no effect on @code{printf} -statements. - -@node Format-Control, Modifiers, Basic Printf, Printf -@subsection Format--Control Characters -@cindex @code{printf}, format-control characters - - -@cindex Format specifier -A format specifier starts with the character @samp{%} and ends with a -@dfn{format--control letter}; it tells the @code{printf} statement how -to output one item. (If you actually want to output a @samp{%}, write -@samp{%%}.) The format--control letter specifies what kind of value to -print. The rest of the format specifier is made up of optional -@dfn{modifiers} which are parameters such as the field width to use. - -Here is a list of them: - -@table @samp -@item c -This prints a number as an ASCII character. Thus, @samp{printf "%c", -65} outputs the letter @samp{A}. The output for a string value is -the first character of the string. - -@item d -This prints a decimal integer. - -@item e -This prints a number in scientific (exponential) notation. -For example, - -@example -printf "%4.3e", 1950 -@end example - -@noindent -prints @samp{1.950e+03}, with a total of 4 significant figures of -which 3 follow the decimal point. The @samp{4.3} are @dfn{modifiers}, -discussed below. - -@item f -This prints a number in floating point notation. - -@item g -This prints either scientific notation or floating point notation, whichever -is shorter. - -@item o -This prints an unsigned octal integer. - -@item s -This prints a string. - -@item x -This prints an unsigned hexadecimal integer. - -@item % -This isn't really a format--control letter, but it does have a meaning -when used after a @samp{%}: the sequence @samp{%%} outputs one -@samp{%}. It does not consume an argument. -@end table - -@node Modifiers, Printf Examples, Format-Control, Printf -@subsection Modifiers for @code{printf} Formats - -@cindex @code{printf}, modifiers -@cindex Modifiers (in format specifiers) -A format specification can also include @dfn{modifiers} that can control -how much of the item's value is printed and how much space it gets. The -modifiers come between the @samp{%} and the format--control letter. Here -are the possible modifiers, in the order in which they may appear: - -@table @samp -@item - -The minus sign, used before the width modifier, says to left--justify -the argument within its specified width. Normally the argument -is printed right--justified in the specified width. - -@item @var{width} -This is a number representing the desired width of a field. Inserting any -number between the @samp{%} sign and the format control character forces the -field to be expanded to this width. The default way to do this is to -pad with spaces on the left. - -@item .@var{prec} -This is a number that specifies the precision to use when printing. -This specifies the number of digits you want printed to the right of the -decimal place. -@end table - -The C library @code{printf}'s dynamic @var{width} and @var{prec} -capability (for example, @code{"%*.*s"}) is not supported. However, it can -be easily simulated using concatenation to dynamically build the -format string. - -@node Printf Examples, , Modifiers, Printf -@subsection Examples of Using @code{printf} - -Here is how to use @code{printf} to make an aligned table: - -@example -awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -@noindent -prints the names of bulletin boards (@code{$1}) of the file -@file{BBS-list} as a string of 10 characters, left justified. It also -prints the phone numbers (@code{$2}) afterward on the line. This will -produce an aligned two--column table of names and phone numbers, like so:@refill - -@example -aardvark 555-5553 -alpo-net 555-3412 -barfly 555-7685 -bites 555-1675 -camelot 555-0542 -core 555-2912 -fooey 555-1234 -foot 555-6699 -macfoo 555-6480 -sdace 555-3430 -sabafoo 555-2127 -@end example - -Did you notice that we did not specify that the phone numbers be -printed as numbers? They had to be printed as strings because the -numbers are separated by a dash. This dash would be interpreted as a -@dfn{minus} sign if we had tried to print the phone numbers as -numbers. This would have led to some pretty confusing results. - -We did not specify a width for the phone numbers because they are the -last things on their lines. We don't need to put spaces after them. - -We could make our table look even nicer by adding headings to the tops of -the columns. To do this, use the BEGIN pattern (@pxref{BEGIN/END}) to cause -the header to be printed only once, at the beginning of the @code{awk} -program: - -@example -awk 'BEGIN @{ print "Name Number" - print "---- ------" @} - @{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -Did you notice that we mixed @code{print} and @code{printf} statements in -the above example? We could have used just @code{printf} statements to get -the same results: - -@example -awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number" - printf "%-10s %s\n", "----", "------" @} - @{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -@noindent -By outputting each column heading with the same format specification -used for the elements of the column, we have made sure that the headings -will be aligned just like the columns. - -The fact that the same format specification is used can be emphasized -by storing it in a variable, like so: - -@example -awk 'BEGIN @{ format = "%-10s %s\n" - printf format, "Name", "Number" - printf format, "----", "------" @} - @{ printf format, $1, $2 @}' BBS-list -@end example - -See if you can use the @code{printf} statement to line up the headings and -table data for our @file{inventory-shipped} example covered earlier in the -section on the @code{print} statement (@pxref{Print}). - -@node One-liners, Patterns, Printing, Top -@chapter Useful ``One-liners'' - -@cindex One-liners -Useful @code{awk} programs are often short, just a line or two. Here is a -collection of useful, short programs to get you started. Some of these -programs contain constructs that haven't been covered yet. The description -of the program will give you a good idea of what is going on, but please -read the rest of the manual to become an @code{awk} expert! - -@table @code -@item awk '@{ num_fields = num_fields + NF @} -@itemx @code{ END @{ print num_fields @}'} -This program prints the total number of fields in all input lines. - -@item awk 'length($0) > 80' -This program prints every line longer than 80 characters. The sole -rule has a relational expression as its pattern, and has no action (so the -default action, printing the record, is used). - -@item awk 'NF > 0' -This program prints every line that has at least one field. This is an -easy way to delete blank lines from a file (or rather, to create a new -file similar to the old file but from which the blank lines have been -deleted). - - -@item awk '@{ if (NF > 0) print @}' -This program also prints every line that has at least one field. Here we -allow the rule to match every line, then decide in the action whether -to print. - -@item awk 'BEGIN @{ for (i = 1; i <= 7; i++) -@itemx @code{ print int(101 * rand()) @}'} -This program prints 7 random numbers from 0 to 100, inclusive. - -@item ls -l @var{files} | awk '@{ x += $4 @} ; END @{ print "total bytes: " x @}' -This program prints the total number of bytes used by @var{files}. - -@item expand @var{file} | awk '@{ if (x < length()) x = length() @} -@itemx @code{ END @{ print "maximum line length is " x @}'} -This program prints the maximum line length of @var{file}. The input -is piped through the @code{expand} program to change tabs into spaces, -so the widths compared are actually the right--margin columns. -@end table - -@node Patterns, Actions, One-liners, Top -@chapter Patterns - -@cindex Patterns, definition of -@cindex Patterns, types of -Patterns control the execution of rules: a rule is executed when its -pattern matches the input record. The @code{awk} language provides -several special patterns that are described in the sections that -follow. Patterns include:@refill - -@ignore -@strong{I think the ordering here needs to be rearranged. @code{BEGIN} -and @code{END} first, then @var{null}, /@var{regexp}/, @var{condexp}, -@var{condexp bool condexp}, @var{exp1} ? @var{exp2} : @var{exp3}, and -finally the range pattern.} -@end ignore - -@table @asis -@item @var{null} -The empty pattern, which matches every input record. (@xref{Empty, , The -Empty Pattern}.) - -@item /@var{regular expression}/ -A regular expression as a pattern. It matches when the text of the -input record fits the regular expression. (@xref{Regexp, , Regular -Expressions as Patterns}.) - -@item @var{condexp} -A single comparison expression. It matches when it is true. -(@xref{Comparison Patterns, , Comparison Expressions as Patterns}.) - -@item @code{BEGIN} -@itemx @code{END} -Special patterns to supply start--up or clean--up information to -@code{awk}. (@xref{BEGIN/END, , Specifying Record Ranges With -Patterns}.) - -@item @var{pat1}, @var{pat2} -A pair of patterns separated by a comma, specifying a range of records. -(@xref{Ranges, , Specifying Record Ranges With Patterns}.) - -@item @var{condexp1} @var{boolean} @var{condexp2} -A @dfn{compound} pattern, which combines expressions with the operators -@samp{and}, @code{&&}, and @samp{or}, @code{||}. (@xref{Boolean, , -Boolean Operators and Patterns}.) - -@item ! @var{condexp} -The pattern @var{condexp} is evaluated. Then the @code{!} performs a -boolean ``not'' or logical negation operation; if the input line matches -the pattern in @var{condexp} then the associated action is @emph{not} -executed. If the input line did not match that pattern, then the action -@emph{is} executed. (@xref{Boolean, , Boolean Operators and Patterns}.) - -@item (@var{expr}) -Parentheses may be used to control how operators nest. - -@item @var{pat1} ? @var{pat2} : @var{pat3} -The first pattern is evaluated. If it is true, the input line is tested -against the second pattern, otherwise it is tested against the third. -(@xref{Conditional Patterns, , Conditional Patterns}.) -@end table - -@menu -The following subsections describe these forms in detail: - -* Empty:: The empty pattern, which matches every record. - -* Regexp:: Regular expressions such as @samp{/foo/}. - -* Comparison Patterns:: Comparison expressions such as @samp{$1 > 10}. - -* Boolean:: Combining comparison expressions. - -* Ranges:: Using pairs of patterns to specify record ranges. - -* BEGIN/END:: Specifying initialization and cleanup rules. - -* Conditional Patterns:: Patterns such as @samp{pat1 ? pat2 : pat3}. -@end menu - -@node Empty, Regexp, , Patterns -@section The Empty Pattern - -@cindex Empty pattern -@cindex Pattern, empty -An empty pattern is considered to match @emph{every} input record. For -example, the program:@refill - -@example -awk '@{ print $1 @}' BBS-list -@end example - -@noindent -prints just the first field of every record. - -@node Regexp, Comparison Patterns, Empty, Patterns -@section Regular Expressions as Patterns -@cindex Pattern, regular expressions -@cindex Regexp -@cindex Regular expressions as patterns - -A @dfn{regular expression}, or @dfn{regexp}, is a way of describing -classes of strings. When enclosed in slashes (@code{/}), it makes -an @code{awk} pattern that matches every input record that contains -a match for the regexp. - -The simplest regular expression is a sequence of letters, numbers, or -both. Such a regexp matches any string that contains that sequence. -Thus, the regexp @samp{foo} matches any string containing @samp{foo}. -(More complicated regexps let you specify classes of similar strings.) - -@menu -* Usage: Regexp Usage. How regexps are used in patterns. -* Operators: Regexp Operators. How to write a regexp. -@end menu - -@node Regexp Usage, Regexp Operators, , Regexp -@subsection How to use Regular Expressions - -When you enclose @samp{foo} in slashes, you get a pattern that matches -a record that contains @samp{foo}. For example, this prints the second -field of each record that contains @samp{foo} anywhere: - -@example -awk '/foo/ @{ print $2 @}' BBS-list -@end example - -@cindex Regular expression matching operators -@cindex String-matching operators -@cindex Operators, string-matching -@cindex Operators, regular expression matching -@cindex regexp search operators -Regular expressions can also be used in comparison expressions. Then -you can specify the string to match against; it need not be the entire -current input record. These comparison expressions can be used as -patterns or in @code{if} and @code{while} statements. - -@table @code -@item @var{exp} ~ /@var{regexp}/ -This is true if the expression @var{exp} (taken as a character string) is -matched by @var{regexp}. The following example matches, or selects, all -input records with the letter @samp{J} in the first field:@refill - -@example -awk '$1 ~ /J/' inventory-shipped -@end example - -So does this: - -@example -awk '@{ if ($1 ~ /J/) print @}' inventory-shipped -@end example - -@item @var{exp} !~ /@var{regexp}/ -This is true if the expression @var{exp} (taken as a character string) is -@emph{not} matched by @var{regexp}. The following example matches, or -selects, all input records whose first field @emph{does not} contain the -letter @samp{J}:@refill - -@example -awk '$1 !~ /J/' inventory-shipped -@end example -@end table - -@cindex Computed Regular Expressions -@cindex Regular Expressions, Computed -@cindex Dynamic Regular Expressions -@cindex Regular Expressions, Dynamic -The right hand side of a @code{~} or @code{!~} operator need not be -a constant regexp (i.e. a string of characters between @samp{/}s). It can -also be @dfn{computed}, or @dfn{dynamic}. For example: - -@example -identifier = "[A-Za-z_][A-Za-z_0-9]+" -$0 ~ identifier -@end example - -@noindent -sets @code{identifier} to a regexp that describes @code{awk} variable -names, and tests if the input record matches this regexp. - -A dynamic regexp may actually be any expression. The expression is -evaluated, and the result is treated as a string that describes a -regular expression. - -@node Regexp Operators, , Regexp Usage, Regexp -@subsection Regular Expression Operators -@cindex Metacharacters -@cindex Regular expression, metacharacters - -You can combine regular expressions with the following characters, -called @dfn{regular expression operators}, or @dfn{metacharacters}, to -increase the power and versatility of regular expressions. This is -a table of metacharacters: - -@table @code -@item \ -This is used to suppress the special meaning of a character when -matching. For example: - -@example -\$ -@end example - -@noindent -matches the character @samp{$}. - -@item ^ -This matches the beginning of the string or the beginning of a line -within the string. For example: - -@example -^@@chapter -@end example - -@noindent -matches the @samp{@@chapter} at the beginning of a string, and can be used -to identify chapter beginnings in Texinfo source files. - -@item $ -This is similar to @code{^}, but it matches only at the end of a string -or the end of a line within the string. For example: - -@example -/p$/ -@end example - -@noindent -as a pattern matches a record that ends with a @samp{p}. - -@item . -This matches any single character except a newline. For example: - -@example -.P -@end example - -@noindent -matches any single character followed by a @samp{P} in a string. Using -concatenation we can make regular expressions like @samp{U.A}, which matches -any three--character string that begins with @samp{U} and ends with @samp{A}. - -@item [@dots{}] -This is called a @dfn{character set}. It matches any one of a group of -characters that are enclosed in the square brackets. For example: - -@example -[MVX] -@end example - -@noindent -matches any of the characters @samp{M}, @samp{V}, or @samp{X} in a -string.@refill - -Ranges of characters are indicated by using a hyphen between the beginning -and ending characters, and enclosing the whole thing in brackets. For -example:@refill - -@example -[0-9] -@end example - -@noindent -matches any string that contains a digit. - -Note that special patterns have to be followed to match the characters, -@samp{]}, @samp{-}, and @samp{^} when they are enclosed in the square -brackets. To match a @samp{]}, make it the first character in the set. -For example: - -@example -[]d] -@end example - -@noindent -matches either @samp{]}, or @samp{d}.@refill - -To match @samp{-}, write it as @samp{---}, which is a range containing only -@samp{-}. You may also make the @samp{-} be the first or last character -in the set. To match @samp{^}, make it any character except the first one of -a set. - -@item [^ @dots{}] -This is the @dfn{complemented character set}. The first character after -the @samp{[} @emph{must} be a @samp{^}. This matches any characters -@emph{except} those in the square brackets. For example: - -@example -[^0-9] -@end example - -@noindent -matches any characters that are not digits. - -@item | -This is the @dfn{alternation operator} and it is used to specify -alternatives. For example: - -@example -^P|[0-9] -@end example - -@noindent -matches any string that matches either @samp{^P} or @samp{[0-9]}. This -means it matches any string that contains a digit or starts with @samp{P}. - -@item (@dots{}) -Parentheses are used for grouping in regular expressions as in -arithmetic. They can be used to concatenate regular expressions -containing the alternation operator, @samp{|}. - -@item * -This symbol means that the preceding regular expression is to be -repeated as many times as possible to find a match. For example: - -@example -ph* -@end example - -@noindent -applies the @code{*} symbol to the preceding @samp{h} and looks for matches -to one @samp{p} followed by any number of @samp{h}'s. This will also match -just @samp{p} if no @samp{h}'s are present. - -The @code{*} means repeat the @emph{smallest} possible preceding expression -in order to find a match. The @code{awk} language processes a @code{*} by -matching as many repetitions as can be found. For example: - -@example -awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample -@end example - -@noindent -matches every record in the input containing a string of the form -@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.@refill - -@item + -This symbol is similar to @code{*}, but the preceding expression must be -matched at least once. This means that: - -@example -wh+y -@end example - -@noindent -would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas @samp{wh*y} -would match all three of these strings. And this is a simpler -way of writing the last @samp{*} example: - -@example -awk '/\(c[ad]+r x\)/ @{ print @}' sample -@end example - -@item ? -This symbol is similar to @code{*}, but the preceding expression can be -matched once or not at all. For example: - -@example -fe?d -@end example - -@noindent -will match @samp{fed} or @samp{fd}, but nothing else.@refill -@end table - -In regular expressions, the @code{*}, @code{+}, and @code{?} operators have -the highest precedence, followed by concatenation, and finally by @code{|}. -As in arithmetic, parentheses can change how operators are grouped.@refill - -Any other character stands for itself. However, it is important to note -that case in regular expressions @emph{is} significant, both when matching -ordinary (i.e. non--metacharacter) characters, and inside character sets. -Thus a @samp{w} in a regular expression matches only a lower case @samp{w} -and not either an uppercase or lowercase @samp{w}. When you want to -do a case--independent match, you have to use a character set: @samp{[Ww]}. - -@node Comparison Patterns, Ranges, Regexp, Patterns -@section Comparison Expressions as Patterns -@cindex Comparison expressions as patterns -@cindex Pattern, comparison expressions -@cindex Relational operators -@cindex Operators, relational - -@dfn{Comparison patterns} use @dfn{relational operators} to compare -strings or numbers. The relational operators are the same as in C. -Here is a table of them: - -@table @code -@item @var{x} < @var{y} -True if @var{x} is less than @var{y}. - -@item @var{x} <= @var{y} -True if @var{x} is less than or equal to @var{y}. - -@item @var{x} > @var{y} -True if @var{x} is greater than @var{y}. - -@item @var{x} >= @var{y} -True if @var{x} is greater than or equal to @var{y}. - -@item @var{x} == @var{y} -True if @var{x} is equal to @var{y}. - -@item @var{x} != @var{y} -True if @var{x} is not equal to @var{y}. -@end table - -Comparison expressions can be used as patterns to control whether a -rule is executed. The expression is evaluated for each input record -read, and the pattern is considered matched if the condition is -@dfn{true}. - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (@pxref{Conversion}). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, @code{"10"} is less than @code{"9"}. - -The following example prints the second field of each input record -whose first field is precisely @samp{foo}. - -@example -awk '$1 == "foo" @{ print $2 @}' BBS-list -@end example - -@noindent -Contrast this with the following regular expression match, which would -accept any record with a first field that contains @samp{foo}: - -@example -awk '$1 ~ "foo" @{ print $2 @}' BBS-list -@end example - -@node Ranges, BEGIN/END, Comparison Patterns, Patterns -@section Specifying Record Ranges With Patterns - -@cindex Range pattern -@cindex patterns, range -A @dfn{range pattern} is made of two patterns separated by a comma: -@samp{@var{begpat}, @var{endpat}}. It matches ranges of consecutive -input records. The first pattern @var{begpat} controls where the -range begins, and the second one @var{endpat} controls where it ends. - -They work as follows: @var{begpat} is matched against every input -record; when a record matches @var{begpat}, the range pattern becomes -@dfn{turned on}. The range pattern matches this record. As long as it -stays turned on, it automatically matches every input record read. But -meanwhile, @var{endpat} is matched against every input record, and when -it matches, the range pattern is turned off again for the following -record. Now we go back to checking @var{begpat} against each record. -For example:@refill - -@example -awk '$1 == "on", $1 == "off"' -@end example - -@noindent -prints every record between on/off pairs, inclusive. - -The record that turns on the range pattern and the one that turns it -off both match the range pattern. If you don't want to operate on -these records, you can write @code{if} statements in the rule's action -to distinguish them. - -It is possible for a pattern to be turned both on and off by the same -record, if both conditions are satisfied by that record. Then the action is -executed for just that record. - -@node BEGIN/END, Boolean, Ranges, Patterns -@section @code{BEGIN} and @code{END} Special Patterns - -@cindex @code{BEGIN}, special pattern -@cindex Patterns, @code{BEGIN} -@cindex @code{END}, special pattern -@cindex Patterns, @code{END} -@code{BEGIN} and @code{END} are special patterns. They are not used to -match input records. Rather, they are used for supplying start--up or -clean--up information to your @code{awk} script. A @code{BEGIN} rule is -executed, once, before the first input record has been read. An @code{END} -rule is executed, once, after all the input has been read. For -example:@refill - -@example -awk 'BEGIN @{ print "Analysis of ``foo'' program" @} - /foo/ @{ ++foobar @} - END @{ print "``foo'' appears " foobar " times." @}' BBS-list -@end example - -This program finds out how many times the string @samp{foo} appears in the -input file @file{BBS-list}. The @code{BEGIN} pattern prints out a title -for the report. There is no need to use the @code{BEGIN} pattern to -initialize the counter @code{foobar} to zero, as @code{awk} does this for -us automatically (@pxref{Variables}). -The second rule increments the variable @code{foobar} -every time a record containing the pattern @samp{foo} is read. The last -rule prints out the value of @code{foobar} at the end of the run.@refill - -The special patterns @code{BEGIN} and @code{END} do not combine with -other kinds of patterns. - -An @code{awk} program may have multiple @code{BEGIN} and/or @code{END} -rules. The contents of multiple @code{BEGIN} or @code{END} rules are -treated as if they had been enclosed in a single rule, in the order -that the rules are encountered in the @code{awk} program. (This feature -was introduced with the new version of @code{awk}.) - -Multiple @code{BEGIN} and @code{END} sections are also useful -for writing library functions that need to do initialization and/or cleanup -of their own. Note that the order in which library functions are named -on the command line will affect the order in which their @code{BEGIN} -and @code{END} rules will be executed. Therefore you have to be careful -how you write your library functions. (@xref{Command Line}, for more -information on using library functions.) - -If an @code{awk} program only has a @code{BEGIN} rule, and no other -rules, then the program will exit after the @code{BEGIN} rule has been -run. Older versions of @code{awk} used to read their input until end of -file was seen. However, if an @code{END} rule exists as well, then the -input will be read, even if there are no other rules in the program. - -@code{BEGIN} and @code{END} rules must have actions; there is no default -action for these rules since there is no current record when they run. - -@node Boolean, Conditional Patterns, BEGIN/END, Patterns -@section Boolean Operators and Patterns -@cindex Patterns, boolean -@cindex Boolean patterns - -A boolean pattern is a combination of other patterns using the boolean -operators ``or'' (@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), -along with parentheses to control nesting. Whether the boolean pattern -matches an input record is computed from whether its subpatterns match. - -The subpatterns of a boolean pattern can be regular expressions, -matching expressions, comparisons, or other boolean combinations of -such. Range patterns cannot appear inside boolean operators, since they -don't make sense for classifying a single record, and neither can the -special patterns @code{BEGIN} and @code{END}, which never match any -input record. - -Here are descriptions of the three boolean operators. - -@table @code -@item @var{pat1} && @var{pat2} -Matches if both @var{pat1} and @var{pat2} match by themselves. For -example, the following command prints all records in the input file -@file{BBS-list} that contain both @samp{2400} and @samp{foo}.@refill - -@example -awk '/2400/ && /foo/' BBS-list -@end example - -Whether @var{pat2} matches is tested only if @var{pat1} succeeds. This -can make a difference when @var{pat2} contains expressions that have -side effects: in the case of @samp{/foo/ && ($2 == bar++)}, the variable -@code{bar} is not incremented if there is no @samp{foo} in the record.@refill - -@item @var{pat1} || @var{pat2} -Matches if at least one of @var{pat1} and @var{pat2} matches the current -input record. For example, the following command prints all records in -the input file @file{BBS-list} that contain @emph{either} @samp{2400} or -@samp{foo}, or both.@refill - -@example -awk '/2400/ || /foo/' BBS-list -@end example - -Whether @var{pat2} matches is tested only if @var{pat1} fails to match. -This can make a difference when @var{pat2} contains expressions that -have side effects. - -@item !@var{pat} -Matches if @var{pat} does not match. For example, the following command -prints all records in the input file @file{BBS-list} that do @emph{not} -contain the string @samp{foo}. - -@example -awk '! /foo/' BBS-list -@end example -@end table - -Note that boolean patterns are built from other patterns just as boolean -expressions are built from other expressions (@pxref{Boolean Ops}). Any -boolean expression is also a valid boolean pattern. But the converse is -not true: simple regular expression patterns such as @samp{/foo/} are not -allowed in boolean expressions. Regular expressions can appear in boolean -expressions only in conjunction with the matching operators, @samp{~} -and @samp{!~}. - -@node Conditional Patterns, , Boolean, Patterns -@section Conditional Patterns -@cindex Conditional Patterns -@cindex Patterns, Conditional -@cindex Ternary Operator -@cindex Operator, Ternary - -Patterns may use a @dfn{conditional expression} much like the conditional -expression of the C language. This takes the form: - -@example -@var{pat1} ? @var{pat2} : @var{pat3} -@end example - -The first pattern is evaluated. If it evaluates to @var{true}, then the -input record is tested against @var{pat2}. Otherwise it is tested -against @var{pat3}. The conditional pattern matches if @var{pat2} or -@var{pat3} (whichever one is selected) matches.@refill - -@node Actions, Expressions, Patterns, Top -@chapter Actions: The Basics -@cindex Action, general -@cindex Curly braces -@cindex Action, curly braces -@cindex Action, separating statements - -The @dfn{action} part of an @code{awk} rule tells @code{awk} what to do -once a match for the pattern is found. An action consists of one or more -@code{awk} @dfn{statements}, enclosed in curly braces (@samp{@{} and -@samp{@}}). The curly braces must be used even if the action contains only -one statement, or even if it contains no statements at all. Action statements -are separated by newlines or semicolons.@refill - -Besides the print statements already covered (@pxref{Printing}), there are -four kinds of action statements: expressions, control statements, compound -statements, and function definitions.@refill - -@itemize @bullet -@item -@cindex Expressions -@dfn{Expressions} include assignments, arithmetic, function calls, and more -(@pxref{Expressions}).@refill - -@item -@cindex Statements -@dfn{Control statements} specify the control flow of @code{awk} programs. The -@code{awk} language gives you C--like constructs (@code{if}, @code{for}, -@code{while}, and so on) as well as a few special ones -(@pxref{Statements}).@refill - -@item -@cindex Compound statements -A @dfn{compound statement} is just one or more @code{awk} statements -enclosed in curly braces. This way you can group several statements -to form the body of an @code{if} or similar statement. - -@item -@cindex Function definitions -You can define @dfn{user--defined functions} for use elsewhere in the -@code{awk} program (@pxref{User-defined}). -@end itemize - -@iftex -The next two chapters will cover in detail expressions and control statements, -respectively. -We will then detour for a chapter to talk about arrays. -@c (@strong{This is poor organization!!!}) -Then the following two chapters will deal with compound statements and -user--defined functions, respectively.@refill -@end iftex - -@node Expressions, Statements, Actions, Top -@chapter Actions: Expressions - -Expressions are the basic building block of @code{awk} actions. An -expression evaluates to a value, which you can print, test, store in a -variable or pass to a function. - -But, beyond that, an expression can assign a new value to a variable -or a field, with an assignment operator. - -An expression can serve as a statement on its own. Most other action -statements are made up of various combinations of expressions. As in -other languages, expressions in @code{awk} include variables, array -references, constants, and function calls, as well as combinations of -these with various operators. - -@menu -* Constants:: String and numeric constants. -* Variables:: Variables give names to values for future use. -* Fields:: Field references such as @code{$1} are also expressions. -* Arrays:: Array element references are expressions. - -* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.) -* Concatenation:: Concatenating strings. -* Comparison Ops:: Comparison of numbers and strings with @samp{<}, etc. -* Boolean Ops:: Combining comparison expressions using boolean operators - @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not''). - -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. - -* Conversion:: The conversion of strings to numbers and vice versa. -* Conditional Exp:: Conditional expressions select between two subexpressions - under control of a third subexpression. -* Function Calls:: A function call is an expression. -@end menu - -@node Constants, Variables, , Expressions -@section Constant Expressions -@cindex Constants, types of -@cindex String constants -@cindex String value - -There are two types of constants: numeric constants and string constants. - -@cindex Numerical constant -@cindex Numerical value -The @dfn{numeric constant} is a number. This number can be an integer, a -decimal fraction, or a number in scientific (exponential) notation. Note that -all numeric values are represented within @code{awk} in double--precision -floating point. Here are some examples of numeric constants, which all -have the same value: - -@example -105 -1.05e+2 -1050e-1 -@end example - -A string constant consists of a sequence of characters enclosed in -double--quote marks. For example: - -@example -"parrot" -@end example - -@noindent -@cindex Differences between @code{gawk} and @code{awk} -represents the string constant @samp{parrot}. Strings in @code{gawk} can -be of any length and they can contain all the possible 8--bit ASCII -characters including ASCII NUL. Other @code{awk} implementations may -have difficulty with some character codes.@refill - -@cindex Escape sequence notation -Some characters cannot be included literally in a string. You represent -them instead with @dfn{escape sequences}, which are character sequences -beginning with a backslash (@samp{\}). - -One use of the backslash is to include double--quote characters in a string. -Since a plain double--quote would end the string, you must use @samp{\"}. -Backslash itself is another character that can't be included normally; -you write @samp{\\} to put one backslash in the string. - -Another use of backslash is to represent unprintable characters -such as newline. While there is nothing to stop you from writing these -characters directly in an @code{awk} program, they may look ugly. - -@table @code -@item \b -Represents a backspaced, @samp{@ctrl{H}}. - -@item \f -Represents a formfeed, @samp{@ctrl{L}}. - -@item \n -Represents a newline, @samp{@ctrl{J}}. - -@item \r -Represents a carriage return, @samp{@ctrl{M}}. - -@item \t -Represents a horizontal tab, @samp{@ctrl{I}}. - -@item \v -Represents a vertical tab, @samp{@ctrl{K}}. - -@item \@var{nnn} -Represents the octal value @var{nnn}, where @var{nnn} is one to three digits -between 0 and 7. For example, the code for the ASCII ESC (escape) character -is @samp{\033}.@refill -@end table - -@node Variables, Arithmetic Ops, Constants, Expressions -@section Variables -@cindex Variables, user-defined -@cindex User-defined variables - -Variables let you give names to values and refer to them later. You have -already seen variables in many of the examples. The name of a variable -must be a sequence of letters, digits and underscores, but it may not begin -with a digit. Case is significant in variable names; @code{a} and @code{A} -are distinct variables. - -A variable name is a valid expression by itself; it represents the -variable's current value. Variables are given new values with -@dfn{assignment operators} and @dfn{increment operators}. -@xref{Assignment Ops}. - -@cindex Built-in variables -@cindex Variables, built-in -A few variables have special built--in meanings, such as @code{FS}, the -field separator, and @code{NF}, the number of fields in the current input -record. @xref{Special}, for a list of them. Special variables can -be used and assigned just like all other variables, but their values -are also used or changed automatically by @code{awk}. Each special -variable's name is made entirely of upper case letters. - -Variables in @code{awk} can be assigned either numeric values or string -values. By default, variables are initialized to the null string, which -has the numeric value zero. So there is no need to ``initialize'' -each variable explicitly in @code{awk}, the way you would need to do -in C or most other traditional programming languages. - -@node Arithmetic Ops, Concatenation, Variables, Expressions -@section Arithmetic Operators - -@cindex Arithmetic operators -@cindex Operators, arithmetic -The @code{awk} language uses the common arithmetic operators when -evaluating expressions. All of these arithmetic operators follow normal -precedence rules, and work as you would expect them to. This example -divides field 3 by field 4, adds field 2, stores the result into field -1, and prints the results: - -@example -awk '@{ $1 = $2 + $3 / $4; print @}' inventory-shipped -@end example - -The arithmetic operators in @code{awk} are: - -@table @code -@item @var{x} + @var{y} -Addition. - -@item @var{x} - @var{y} -Subtraction. - -@item - @var{x} -Negation. - -@item @var{x} / @var{y} -Division. Since all numbers in @code{awk} are double--precision -floating point, the result is not rounded to an integer: @samp{3 / 4} -has the value 0.75. - -@item @var{x} * @var{y} -Multiplication. - -@item @var{x} % @var{y} -@cindex Mod function, semantics of -@cindex Differences between @code{gawk} and @code{awk} -@c @strong{How are gawk and awk different here?} -Remainder. The quotient is rounded toward zero to an integer, -multiplied by @var{y} and this result is subtracted from @var{x}. -This operation is sometimes known as ``trunc--mod''. The following -relation always holds: - -@display -@code{b * int(a / b) + (a % b) == a} -@end display - -One undesirable effect of this definition of remainder is that -@var{x} % @var{y} is negative if @var{x} is negative. Thus, - -@example --17 % 8 = -1 -@end example - -@item @var{x} ^ @var{y} -@itemx @var{x} ** @var{y} -Exponentiation: @var{x} raised to the @var{y} power. @samp{2 ^ 3} has -the value 8. The character sequence @samp{**} is equivalent to -@samp{^}. -@end table - -@node Concatenation, Comparison Ops, Arithmetic Ops, Expressions -@section String Concatenation - -@cindex String operators -@cindex Operators, string -@cindex Concatenation -There is only one string operation: concatenation. It does not have a -specific operator to represent it. Instead, concatenation is performed by -writing expressions next to one another, with no operator. For example: - -@example -awk '@{ print "Field number one: " $1 @}' BBS-list -@end example - -@noindent -produces, for the first record in @file{BBS-list}: - -@example -Field number one: aardvark -@end example - -If you hadn't put the space after the @samp{:}, the line would have run -together. For example: - -@example -awk '@{ print "Field number one:" $1 @}' BBS-list -@end example - -@noindent -produces, for the first record in @file{BBS-list}: - -@example -Field number one:aardvark -@end example - -@node Comparison Ops, Boolean Ops, Concatenation, Expressions -@section Comparison Expressions -@cindex Comparison expressions -@cindex Expressions, comparison -@cindex Relational operators -@cindex Operators, relational - -@dfn{Comparison expressions} use @dfn{relational operators} to compare -strings or numbers. The relational operators are the same as in C. -Here is a table of them: - -@table @code -@item @var{x} < @var{y} -True if @var{x} is less than @var{y}. - -@item @var{x} <= @var{y} -True if @var{x} is less than or equal to @var{y}. - -@item @var{x} > @var{y} -True if @var{x} is greater than @var{y}. - -@item @var{x} >= @var{y} -True if @var{x} is greater than or equal to @var{y}. - -@item @var{x} == @var{y} -True if @var{x} is equal to @var{y}. - -@item @var{x} != @var{y} -True if @var{x} is not equal to @var{y}. - -@item @var{x} ~ @var{regexp} -True if regexp @var{regexp} matches the string @var{x}. - -@item @var{x} !~ @var{regexp} -True if regexp @var{regexp} does not match the string @var{x}. - -@item @var{subscript} in @var{array} -True if array @var{array} has an element with the subscript @var{subscript}. -@end table - -Comparison expressions have the value 1 if true and 0 if false. - -The operands of a relational operator are compared as numbers if they -are both numbers. Otherwise they are converted to, and compared as, -strings (@pxref{Conversion}). Strings are compared by comparing the -first character of each, then the second character of each, and so on. -Thus, @code{"10"} is less than @code{"9"}. - -For example, - -@example -$1 == "foo" -@end example - -@noindent -has the value of 1, or is true, if the first field of the current input -record is precisely @samp{foo}. By contrast, - -@example -$1 ~ /foo/ -@end example - -@noindent -has the value 1 if the first field contains @samp{foo}. - -@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions -@section Boolean Operators -@cindex Expressions, boolean -@cindex Boolean expressions -@cindex Operators, boolean -@cindex Boolean operators - -A boolean expression is combination of comparison expressions or matching -expressions, using the boolean operators ``or'' (@samp{||}), ``and'' -(@samp{&&}), and ``not'' (@samp{!}), along with parentheses to control -nesting. The truth of the boolean expression is computed by combining the -truth values of the component expressions. - -Boolean expressions can be used wherever comparison and matching -expressions can be used. They can be used in @code{if} and @code{while} -statements. They have numeric values (1 if true, 0 if false). - -In addition, every boolean expression is also a valid boolean pattern, so -you can use it as a pattern to control the execution of rules. - -Here are descriptions of the three boolean operators, with an example of -each. It may be instructive to compare these examples with the analogous -examples of boolean patterns (@pxref{Boolean}), which use the same boolean -operators in patterns instead of expressions. - -@table @code -@item @var{boolean1} && @var{boolean2} -True if both @var{boolean1} and @var{boolean2} are true. For example, -the following statement prints the current input record if it contains -both @samp{2400} and @samp{foo}.@refill - -@example -if ($0 ~ /2400/ && $0 ~ /foo/) print -@end example - -The subexpression @var{boolean2} is evaluated only if @var{boolean1} -is true. This can make a difference when @var{boolean2} contains -expressions that have side effects: in the case of @samp{$0 ~ /foo/ && -($2 == bar++)}, the variable @code{bar} is not incremented if there is -no @samp{foo} in the record. - -@item @var{boolean1} || @var{boolean2} -True if at least one of @var{boolean1} and @var{boolean2} is true. -For example, the following command prints all records in the input -file @file{BBS-list} that contain @emph{either} @samp{2400} or -@samp{foo}, or both.@refill - -@example -awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list -@end example - -The subexpression @var{boolean2} is evaluated only if @var{boolean1} -is true. This can make a difference when @var{boolean2} contains -expressions that have side effects. - -@item !@var{boolean} -True if @var{boolean} is false. For example, the following program prints -all records in the input file @file{BBS-list} that do @emph{not} contain the -string @samp{foo}. - -@example -awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list -@end example -@end table - -@node Assignment Ops, Increment Ops, Boolean Ops, Expressions -@section Assignment Operators - -@cindex Assignment operators -@cindex Operators, assignment -An @dfn{assignment} is an expression that stores a new value into a -variable. For example, let's assign the value 1 to the variable -@code{z}:@refill - -@example -z = 1 -@end example - -After this expression is executed, the variable @code{z} has the value 1. -Whatever old value @code{z} had before the assignment is forgotten. - -The @code{=} sign is called an @dfn{assignment operator}. It is the -simplest assignment operator because the value of the right--hand -operand is stored unchanged. - -@cindex Lvalue -The left--hand operand of an assignment can be a variable -(@pxref{Variables}), a field (@pxref{Changing Fields}) or an array -element (@pxref{Arrays}). These are all called @dfn{lvalues}, which -means they can appear on the left side of an assignment operator. The -right--hand operand may be any expression; it produces the new value -which the assignment stores in the specified variable, field or array -element. - -Assignments can store string values also. For example, this would store -the value @code{"this food is good"} in the variable @code{message}: - -@example -thing = "food" -predicate = "good" -message = "this " thing " is " predicate -@end example - -@noindent -(This also illustrates concatenation of strings.) - -It is important to note that variables do @emph{not} have permanent types. -The type of a variable is simply the type of whatever value it happens -to hold at the moment. In the following program fragment, the variable -@code{foo} has a numeric value at first, and a string value later on: - -@example -foo = 1 -print foo -foo = "bar" -print foo -@end example - -@noindent -When the second assignment gives @code{foo} a string value, the fact that -it previously had a numeric value is forgotten. - -An assignment is an expression, so it has a value: the same value that -is assigned. Thus, @samp{z = 1} as an expression has the value 1. -One consequence of this is that you can write multiple assignments together: - -@example -x = y = z = 0 -@end example - -@noindent -stores the value 0 in all three variables. It does this because the -value of @samp{z = 0}, which is 0, is stored into @code{y}, and then -the value of @samp{y = z = 0}, which is 0, is stored into @code{x}. - -You can use an assignment anywhere an expression is called for. For -example, it is valid to write @samp{x != (y = 1)} to set @code{y} to 1 -and then test whether @code{x} equals 1. But this style tends to make -programs hard to read; except in a one--shot program, you should -rewrite it to get rid of such nesting of assignments. This is never very -hard. - -Aside from @code{=}, there are several other assignment operators that -do arithmetic with the old value of the variable. For example, the -operator @code{+=} computes a new value by adding the right--hand value -to the old value of the variable. Thus, the following assignment adds -5 to the value of @code{foo}: - -@example -foo += 5 -@end example - -@noindent -This is precisely equivalent to the following: - -@example -foo = foo + 5 -@end example - -@noindent -Use whichever one makes the meaning of your program clearer. - -Here is a table of the arithmetic assignment operators. In each -case, the right--hand operand is an expression whose value is converted -to a number. - -@table @code -@item @var{lvalue} += @var{increment} -Adds @var{increment} to the value of @var{lvalue} to make the new value -of @var{lvalue}. - -@item @var{lvalue} -= @var{decrement} -Subtracts @var{decrement} from the value of @var{lvalue}. - -@item @var{lvalue} *= @var{coefficient} -Multiplies the value of @var{lvalue} by @var{coefficient}. - -@item @var{lvalue} /= @var{quotient} -Divides the value of @var{lvalue} by @var{quotient}. - -@item @var{lvalue} %= @var{modulus} -Sets @var{lvalue} to its remainder by @var{modulus}. - -@item @var{lvalue} ^= @var{power} -@itemx @var{lvalue} **= @var{power} -Raises @var{lvalue} to the power @var{power}. -@end table - -@node Increment Ops, Conversion, Assignment Ops, Expressions -@section Increment Operators - -@cindex Increment operators -@cindex Operators, increment -@dfn{Increment operators} increase or decrease the value of a variable -by 1. You could do the same thing with an assignment operator, so -the increment operators add no power to the @code{awk} language; but they -are convenient abbreviations for something very common. - -The operator to add 1 is written @code{++}. There are two ways to use -this operator: pre--incrementation and post--incrementation. - -To pre--increment a variable @var{v}, write @code{++@var{v}}. This adds -1 to the value of @var{v} and that new value is also the value of this -expression. The assignment expression @code{@var{v} += 1} is completely -equivalent. - -Writing the @code{++} after the variable specifies post--increment. This -increments the variable value just the same; the difference is that the -value of the increment expression itself is the variable's @emph{old} -value. Thus, if @code{foo} has value 4, then the expression @code{foo++} -has the value 4, but it changes the value of @code{foo} to 5. - -The post--increment @code{foo++} is nearly equivalent to writing @samp{(foo -+= 1) - 1}. It is not perfectly equivalent because all numbers in -@code{awk} are floating point: in floating point, @code{foo + 1 - 1} does -not necessarily equal @code{foo}. But the difference will be minute as -long as you stick to numbers that are fairly small (less than a trillion). - -Any lvalue can be incremented. Fields and array elements are incremented -just like variables. - -The decrement operator @code{--} works just like @code{++} except that -it subtracts 1 instead of adding. Like @code{++}, it can be used before -the lvalue to pre--decrement or after it to post--decrement. - -Here is a summary of increment and decrement expressions. - -@table @code -@item ++@var{lvalue} -This expression increments @var{lvalue} and the new value becomes the -value of this expression. - -@item @var{lvalue}++ -This expression causes the contents of @var{lvalue} to be incremented. -The value of the expression is the @emph{old} value of @var{lvalue}. - -@item --@var{lvalue} -Like @code{++@var{lvalue}}, but instead of adding, it subtracts. It -decrements @var{lvalue} and delivers the value that results. - -@item @var{lvalue}-- -Like @code{@var{lvalue}++}, but instead of adding, it subtracts. It -decrements @var{lvalue}. The value of the expression is the @emph{old} -value of @var{lvalue}. -@end table - -@node Conversion, Conditional Exp, Increment Ops, Expressions -@section Conversion of Strings and Numbers - -@cindex Conversion of strings and numbers -Strings are converted to numbers, and numbers to strings, if the context of -your @code{awk} statement demands it. For example, if the values of -@code{foo} or @code{bar} in the expression @code{foo + bar} happen to be -strings, they are converted to numbers before the addition is performed. -If numeric values appear in string concatenation, they are converted -to strings. Consider this:@refill - -@example -two = 2; three = 3 -print (two three) + 4 -@end example - -@noindent -This eventually prints the (numeric) value @samp{27}. The numeric -variables @code{two} and @code{three} are converted to strings and concatenated -together, and the resulting string is converted back to a number before -adding @samp{4}. The resulting numeric value @samp{27} is printed. - -If, for some reason, you need to force a number to be converted to a -string, concatenate the null string with that number. To force a string -to be converted to a number, add zero to that string. Strings that -can't be interpreted as valid numbers are given the numeric value -zero.@refill - -@vindex OFMT -The exact manner in which numbers are converted into strings is controlled -by the @code{awk} special variable @code{OFMT} (@pxref{Special}). -Numbers are converted using a special -version of the @code{sprintf} function (@pxref{Built-in}) with @code{OFMT} -as the format specifier.@refill - -@code{OFMT}'s default value is @code{"%.6g"}, which prints a value with -at least six significant digits. You might want to change it to specify -more precision, if your version of @code{awk} uses double precision -arithmetic. Double precision on most modern machines gives you 16 or 17 -decimal digits of precision.@refill - -Strange results can happen if you set @code{OFMT} to a string that doesn't -tell @code{sprintf} how to format floating point numbers in a useful way. -For example, if you forget the @samp{%} in the format, all numbers will be -converted to the same constant string.@refill - -@node Conditional Exp, Function Calls, Conversion, Expressions -@section Conditional Expressions -@cindex Conditional expression -@cindex Expression, conditional - -A @dfn{conditional expression} is a special kind of expression with -three operands. It allows you to use one expression's value to select -one of two other expressions. - -The conditional expression looks the same as in the C language: - -@example -@var{selector} ? @var{if-true-exp} : @var{if-false-exp} -@end example - -@noindent -There are three subexpressions. The first, @var{selector}, is always -computed first. If it is ``true'' (not zero) then @var{if-true-exp} is -computed next and its value becomes the value of the whole expression. -Otherwise, @var{if-false-exp} is computed next and its value becomes the -value of the whole expression. - -For example, this expression produces the absolute value of @code{x}: - -@example -x > 0 ? x : -x -@end example - -Each time the conditional expression is computed, exactly one of -@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored. -This is important when the expressions contain side effects. For example, -this conditional expression examines element @code{i} of either array -@code{a} or array @code{b}, and increments @code{i}. - -@example -x == y ? a[i++] : b[i++] -@end example - -@noindent -This is guaranteed to increment @code{i} exactly once, because each time -one or the other of the two increment expressions will be executed -and the other will not be. - -@node Function Calls, , Conditional Exp, Expressions -@section Function Calls -@cindex Function call -@cindex Calling a function - -A @dfn{function} is a name for a particular calculation. Because it has -a name, you can ask for it by name at any point in the program. For -example, the function @code{sqrt} computes the square root of a number. - -A fixed set of functions are @dfn{built in}, which means they are -available in every @code{awk} program. The @code{sqrt} function is one -of these. @xref{Built-in}, for a list of built--in functions and their -descriptions. In addition, you can define your own functions in the -program for use elsewhere in the same program. @xref{User-defined}, -for how to do this. - -@cindex Arguments in function call -The way to use a function is with a @dfn{function call} expression, -which consists of the function name followed by a list of -@dfn{arguments} in parentheses. The arguments are expressions which -give the raw materials for the calculation that the function will do. -When there is more than one argument, they are separated by commas. If -there are no arguments, write just @samp{()} after the function name. - -@strong{Do not put any space between the function name and the -open--parenthesis!} A user--defined function name looks just like the name of -a variable, and space would make the expression look like concatenation -of a variable with an expression inside parentheses. Space before the -parenthesis is harmless with built--in functions, but it is best not to get -into the habit of using space, lest you do likewise for a user--defined -function one day by mistake. - -Each function needs a particular number of arguments. For example, the -@code{sqrt} function must be called with a single argument, like this: - -@example -sqrt(@var{argument}) -@end example - -@noindent -The argument is the number to take the square root of. - -Some of the built--in functions allow you to omit the final argument. -If you do so, they will use a reasonable default. @xref{Built-in}, -for full details. If arguments are omitted in calls to user--defined -functions, then those arguments are treated as local variables, -initialized to the null string (@pxref{User-defined}). - -Like every other expression, the function call has a value, which is -computed by the function based on the arguments you give it. In this -example, the value of @code{sqrt(@var{argument})} is the square root of the -argument. A function can also have side effects, such as assigning the -values of certain variables or doing I/O. - -Here is a command to read numbers, one number per line, and print the -square root of each one: - -@example -awk '@{ print "The square root of", $1, "is", sqrt($1) @}' -@end example - -@node Statements, Arrays, Expressions, Top -@chapter Actions: Statements -@cindex Statements - -@dfn{Control statements} such as @code{if}, @code{while}, and so on -control the flow of execution in @code{awk} programs. Most of the -control statements in @code{awk} are patterned on similar statements in -C. - -The simplest kind of statement is an expression. The other kinds of -statements start with special keywords such as @code{if} and -@code{while}, to distinguish them from simple expressions. - -In all the examples in this chapter, @var{body} can be either a single -statement or a group of statements. Groups of statements are enclosed -in braces, and separated by newlines or semicolons.@refill - -@menu -* Expressions:: One kind of statement simply computes an expression. - -* If:: Conditionally execute some @code{awk} statements. - -* While:: Loop until some condition is satisfied. - -* Do:: Do specified action while looping until some - condition is satisfied. - -* For:: Another looping statement, that provides - initialization and increment clauses. - -* Break:: Immediately exit the innermost enclosing loop. - -* Continue:: Skip to the end of the innermost enclosing loop. - -* Next:: Stop processing the current input record. - -* Exit:: Stop execution of @code{awk}. -@end menu - -@node If, While, , Statements -@section The @code{if} Statement - -@cindex @code{if} statement -The @code{if}-@code{else} statement is @code{awk}'s decision--making -statement. The @code{else} part of the statement is optional.@refill - -@display -@code{if (@var{condition}) @var{body1} else @var{body2}} -@end display - -@noindent -Here @var{condition} is an expression that controls what the rest of the -statement will do. If @var{condition} is true, @var{body1} is executed; -otherwise, @var{body2} is executed (assuming that the @code{else} clause -is present). The condition is considered true if it is nonzero or -nonnull. - -Here is an example: - -@example -awk '@{ if (x % 2 == 0) - print "x is even" - else - print "x is odd" @}' -@end example - -In this example, if the statement containing @code{x} is found to be true -(that is, x is divisible by 2), then the first @code{print} statement is -executed, otherwise the second @code{print} statement is performed.@refill - -If the @code{else} appears on the same line as @var{body1}, and @var{body1} -is a single statement, then a semicolon must separate @var{body1} from -@code{else}. To illustrate this, let's rewrite the previous example: - -@group -@example -awk '@{ if (x % 2 == 0) print "x is even"; else - print "x is odd" @}' -@end example -@end group - -@noindent -If you forget the @samp{;}, @code{awk} won't be able to parse it, and -you will get a syntax error. - -We would not actually write this example this way, because a human -reader might fail to see the @code{else} if it were not the first thing -on its line. - -@node While, Do, If, Statements -@section The @code{while} Statement -@cindex @code{while} statement -@cindex Loop -@cindex Body of a loop - -In programming, a loop means a part of a program that is (or at least can -be) executed two or more times in succession. - -The @code{while} statement is the simplest looping statement in -@code{awk}. It repeatedly executes a statement as long as a condition is -true. It looks like this: - -@example -while (@var{condition}) - @var{body} -@end example - -@noindent -Here @var{body} is a statement that we call the @dfn{body} of the loop, -and @var{condition} is an expression that controls how long the loop -keeps running. - -The first thing the @code{while} statement does is test @var{condition}. -If @var{condition} is true, it executes the statement @var{body}. After -@var{body} has been executed, @var{condition} is tested again and this -process is repeated until @var{condition} is no longer true. If -@var{condition} is initially false, the body of the loop is never -executed.@refill - -@example -awk '@{ i = 1 - while (i <= 3) @{ - print $i - i++ - @} -@}' -@end example - -@noindent -This example prints the first three input fields, one per line. - -The loop works like this: first, the value of @code{i} is set to 1. -Then, the @code{while} tests whether @code{i} is less than or equal to -three. This is the case when @code{i} equals one, so the @code{i}-th -field is printed. Then the @code{i++} increments the value of @code{i} -and the loop repeats. - -When @code{i} reaches 4, the loop exits. Here @var{body} is a compound -statement enclosed in braces. As you can see, a newline is not required -between the condition and the body; but using one makes the program clearer -unless the body is a compound statement or is very simple. - -@node Do, For, While, Statements -@section The @code{do}--@code{while} Statement - -The @code{do} loop is a variation of the @code{while} looping statement. -The @code{do} loop executes the @var{body} once, then repeats @var{body} -as long as @var{condition} is true. It looks like this: - -@group -@example -do - @var{body} -while (@var{condition}) -@end example -@end group - -Even if @var{condition} is false at the start, @var{body} is executed at -least once (and only once, unless executing @var{body} makes -@var{condition} true). Contrast this with the corresponding -@code{while} statement: - -@example -while (@var{condition}) - @var{body} -@end example - -@noindent -This statement will not execute @var{body} even once if @var{condition} -is false to begin with. - -Here is an example of a @code{do} statement: - -@example -awk '@{ i = 1 - do @{ - print $0 - i++ - @} while (i <= 10) -@}' -@end example - -@noindent -prints each input record ten times. It isn't a very -realistic example, since in this case an ordinary @code{while} would do -just as well. But this is normal; there is only occasionally a real -use for a @code{do} statement.@refill - -@node For, Break, Do, Statements -@section The @code{for} Statement -@cindex @code{for} statement - -The @code{for} statement makes it more convenient to count iterations of a -loop. The general form of the @code{for} statement looks like this:@refill - -@example -for (@var{initialization}; @var{condition}; @var{increment}) - @var{body} -@end example - -@noindent -This statement starts by executing @var{initialization}. Then, as long -as @var{condition} is true, it repeatedly executes @var{body} and then -@var{increment}. Typically @var{initialization} sets a variable to -either zero or one, @var{increment} adds 1 to it, and @var{condition} -compares it against the desired number of iterations. - -Here is an example of a @code{for} statement: - -@example -awk '@{ for (i = 1; i <= 3; i++) - print $i -@}' -@end example - -@noindent -This prints the first three fields of each input record, one field per -line. - -In the @code{for} statement, @var{body} stands for any statement, but -@var{initialization}, @var{condition} and @var{increment} are just -expressions. You cannot set more than one variable in the -@var{initialization} part unless you use a multiple assignment statement -such as @code{x = y = 0}, which is possible only if all the initial values -are equal. (But you can initialize additional variables by writing -their assignments as separate statements preceding the @code{for} loop.) - -The same is true of the @var{increment} part; to increment additional -variables, you must write separate statements at the end of the loop. -The C compound expression, using C's comma operator, would be useful in -this context, but it is not supported in @code{awk}. - -Most often, @var{increment} is an increment expression, as in the -example above. But this is not required; it can be any expression -whatever. For example, this statement prints odd numbers from 1 to 100: - -@example -# print odd numbers from 1 to 100 -for (i = 1; i <= 100; i += 2) - print i -@end example - -Any of the three expressions following @code{for} may be omitted if you -don't want it to do anything. Thus, @w{@samp{for (;x > 0;)}} is equivalent -to @w{@samp{while (x > 0)}}. -If the @var{condition} part is empty, it is treated as @var{true}, -effectively yielding an infinite loop.@refill - -In most cases, a @code{for} loop is an abbreviation for a @code{while} -loop, as shown here: - -@example -@var{initialization} -while (@var{condition}) @{ - @var{body} - @var{increment} -@} -@end example - -@noindent -(The only exception is when the @code{continue} statement -(@pxref{Continue}) is used inside the loop; changing a @code{for} statement -to a @code{while} statement in this way can change the effect of the -@code{continue} statement inside the loop.)@refill - -The @code{awk} language has a @code{for} statement in addition to a -@code{while} statement because often a @code{for} loop is both less work to -type and more natural to think of. Counting the number of iterations is -very common in loops. It can be easier to think of this counting as part -of looping rather than as something to do inside the loop. - -The next section has more complicated examples of @code{for} loops. - -There is an alternate version of the @code{for} loop, for iterating over -all the indices of an array: - -@example -for (i in array) - @var{process} array[i] -@end example - -@noindent -@xref{Arrays}, for more information on this version of the @code{for} loop. - -@node Break, Continue, For, Statements -@section The @code{break} Statement -@cindex @code{break} statement -@cindex Loops, breaking out of - -The @code{break} statement jumps out of the innermost @code{for}, @code{while}, -or @code{do}--@code{while} loop that encloses it. -The following example finds the -smallest divisor of any number, and also identifies prime numbers:@refill - -@example -awk '# find smallest divisor of num - @{ num = $1 - for (div = 2; div*div <= num; div++) - if (num % div == 0) - break - if (num % div == 0) - printf "Smallest divisor of %d is %d\n", num, div - else - printf "%d is prime\n", num @}' -@end example - -When the remainder is zero in the first @code{if} statement, @code{awk} -immediately @dfn{breaks} out of the containing @code{for} loop. This means -that @code{awk} proceeds immediately to the statement following the loop -and continues processing. (This is very different from the @code{exit} -statement (@pxref{Exit}) which stops the entire @code{awk} -program.)@refill - -Here is another program equivalent to the previous one. It illustrates how -the @var{condition} of a @code{for} or @code{while} could just as well be -replaced with a @code{break} inside an @code{if}: - -@example -awk '# find smallest divisor of num - @{ num = $1 - for (div = 2; ; div++) @{ - if (num % div == 0) @{ - printf "Smallest divisor of %d is %d\n", num, div - break - @} - if (div*div > num) @{ - printf "%d is prime\n", num - break - @} - @} -@}' -@end example - -@node Continue, Next, Break, Statements -@section The @code{continue} Statement - -@cindex @code{continue} statement -The @code{continue} statement, like @code{break}, is used only inside -@code{for}, @code{while}, and @code{do}--@code{while} loops. It skips -over the rest of the loop body, causing the next cycle around the loop -to begin immediately. Contrast this with @code{break}, which jumps out -of the loop altogether. Here is an example:@refill - -@example -# print names that don't contain the string "ignore" - -# first, save the text of each line -@{ names[NR] = $0 @} - -# print what we're interested in -END @{ - for (x in names) @{ - if (names[x] ~ /ignore/) - continue - print names[x] - @} -@} -@end example - -If any of the input records contain the string @samp{ignore}, this example -skips the print statement and continues back to the first statement in the -loop. - -This isn't a practical example of @code{continue}, since it would be -just as easy to write the loop like this: - -@example -for (x in names) - if (x !~ /ignore/) - print x -@end example - -The @code{continue} statement causes @code{awk} to skip the rest of what is -inside a @code{for} loop, but it resumes execution with the increment part -of the @code{for} loop. The following program illustrates this fact:@refill - -@example -awk 'BEGIN @{ - for (x = 0; x <= 20; x++) @{ - if (x == 5) - continue - printf ("%d ", x) - @} - print "" -@}' -@end example - -@noindent -This program prints all the numbers from 0 to 20, except for 5, for -which the @code{printf} is skipped. Since the increment @code{x++} -is not skipped, @code{x} does not remain stuck at 5. - -@node Next, Exit, Continue, Statements -@section The @code{next} Statement -@cindex @code{next} statement - -The @code{next} statement forces @code{awk} to immediately stop processing -the current record and go on to the next record. This means that no -further rules are executed for the current record. The rest of the -current rule's action is not executed either. - -Contrast this with the effect of the @code{getline} function -(@pxref{Getline}). That too causes @code{awk} to read the next record -immediately, but it does not alter the flow of control in any way. So -the rest of the current action executes with a new input record. - -At the grossest level, @code{awk} program execution is a loop that reads -an input record and then tests each rule pattern against it. If you -think of this loop as a @code{for} statement whose body contains the -rules, then the @code{next} statement is analogous to a @code{continue} -statement: it skips to the end of the body of the loop, and executes the -increment (which reads another record). - -For example, if your @code{awk} program works only on records with four -fields, and you don't want it to fail when given bad input, you might use -the following rule near the beginning of the program: - -@example -NF != 4 @{ - printf ("line %d skipped: doesn't have 4 fields", FNR) > "/dev/tty" - next -@} -@end example - -@noindent -so that the following rules will not see the bad record. The error message -is redirected to @file{/dev/tty} (the terminal), so that it won't get lost -amid the rest of the program's regular output. - -@node Exit, , Next, Statements -@section The @code{exit} Statement - -@cindex @code{exit} statement -The @code{exit} statement causes @code{awk} to immediately stop -executing the current rule and to stop processing input; any remaining input -is ignored.@refill - -If an @code{exit} statement is executed from a @code{BEGIN} rule -the program stops processing everything immediately. -No input records will be read. However, if an @code{END} rule is -present, it will be executed (@pxref{BEGIN/END}).@refill - -If @code{exit} is used as part of an @code{END} rule, it causes -the program to stop immediately. - -An @code{exit} statement that is part an ordinary rule (that is, not part -of a @code{BEGIN} or @code{END} rule) stops the execution of any further -automatic rules, but the @code{END} rule is executed if there is one. -If you don't want the @code{END} rule to do its job in this case, you -can set a variable to nonzero before the @code{exit} statement, and check -that variable in the @code{END} rule. - -If an argument is supplied to @code{exit}, its value is used as the exit -status code for the @code{awk} process. If no argument is supplied, -@code{exit} returns status zero (success).@refill - -For example, let's say you've discovered an error condition you really -don't know how to handle. Conventionally, programs report this by -exiting with a nonzero status. Your @code{awk} program can do this -using an @code{exit} statement with a nonzero argument. Here's an -example of this:@refill - -@example -BEGIN @{ - if (("date" | getline date_now) < 0) @{ - print "Can't get system date" - exit 4 - @} -@} -@end example - -@node Arrays, Built-in, Statements, Top -@chapter Actions: Using Arrays in @code{awk} - -An @dfn{array} is a table of various values, called @dfn{elements}. The -elements of an array are distinguished by their @dfn{indices}. Names -of arrays in @code{awk} are strings of alphanumeric characters and -underscores, just like regular variables. - -You cannot use the same identifier as both a variable and as an array -name in one @code{awk} program. - -@menu -* Intro: Array Intro. Basic facts abou arrays in @code{awk}. -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Example: Array Example. Sample program explained. - -* Scanning an Array:: A variation of the @code{for} statement. It loops - through the indices of an array's existing elements. - -* Delete:: The @code{delete} statement removes an element from an array. - -* Multi-dimensional:: Emulating multi--dimensional arrays in @code{awk}. -* Multi-scanning:: Scanning multi--dimensional arrays. -@end menu - -@node Array Intro, Reference to Elements, , Arrays -@section Introduction to Arrays - -@cindex Arrays -The @code{awk} language has one--dimensional @dfn{arrays} for storing groups -of related strings or numbers. Each array must have a name; valid array -names are the same as valid variable names, and they do conflict with -variable names: you can't have both an array and a variable with the same -name at any point in an @code{awk} program. - -Arrays in @code{awk} superficially resemble arrays in other programming -languages; but there are fundamental differences. In @code{awk}, you -don't need to declare the size of an array before you start to use it. -What's more, in @code{awk} any number or even a string may be used as an -array index. - -In most other languages, you have to @dfn{declare} an array and specify -how many elements or components it has. In such languages, the -declaration causes a contiguous block of memory to be allocated for that -many elements. An index in the array must be a positive integer; for -example, the index 0 specifies the first element in the array, which is -actually stored at the beginning of the block of memory. Index 1 -specifies the second element, which is stored in memory right after the -first element, and so on. It is impossible to add more elements to the -array, because it has room for only as many elements as you declared. -(Some languages have arrays whose first index is 1, others require that -you specify both the first and last index when you declare the array. -In such a language, an array could be indexed, for example, from -3 to -17.) A contiguous array of four elements might look like this, -conceptually, if the element values are 8, @code{"foo"}, @code{""} and -30:@refill - -@example -+---------+---------+--------+---------+ -| 8 | "foo" | "" | 30 | @r{value} -+---------+---------+--------+---------+ - 0 1 2 3 @r{index} -@end example - -@noindent -Only the values are stored; the indices are implicit from the order of -the values. 8 is the value at index 0, because 8 appears in the -position with 0 elements before it. - -@cindex Arrays, definition of -@cindex Associative arrays -Arrays in @code{awk} are different: they are @dfn{associative}. This means -that each array is a collection of pairs: an index, and its corresponding -array element value: - -@example -@r{Element} 4 @r{Value} 30 -@r{Element} 2 @r{Value} "foo" -@r{Element} 1 @r{Value} 8 -@r{Element} 3 @r{Value} "" -@end example - -@noindent -We have shown the pairs in jumbled order because their order doesn't -mean anything. - -One advantage of an associative array is that new pairs can be added -at any time. For example, suppose we add to that array a tenth element -whose value is @w{@code{"number ten"}}. The result is this: - -@example -@r{Element} 10 @r{Value} "number ten" -@r{Element} 4 @r{Value} 30 -@r{Element} 2 @r{Value} "foo" -@r{Element} 1 @r{Value} 8 -@r{Element} 3 @r{Value} "" -@end example - -@noindent -Now the array is @dfn{sparse} (i.e. some indices are missing): it has -elements number 4 and 10, but doesn't have an element 5, 6, 7, 8, or -9.@refill - -Another consequence of associative arrays is that the indices don't -have to be positive integers. Any number, or even a string, can be -an index. For example, here is an array which translates words from -English into French: - -@example -@r{Element} "dog" @r{Value} "chien" -@r{Element} "cat" @r{Value} "chat" -@r{Element} "one" @r{Value} "un" -@r{Element} 1 @r{Value} "un" -@end example - -@noindent -Here we decided to translate the number 1 in both spelled--out and -numeral form---thus illustrating that a single array can have both -numbers and strings as indices. - -When @code{awk} creates an array for you, e.g. with the @code{split} -built--in function (@pxref{String Functions}), that array's indices -start at the number one. - -@node Reference to Elements, Assigning Elements, Array Intro, Arrays -@section Referring to an Array Element -@cindex Array reference -@cindex Element of array -@cindex Reference to array - -The principal way of using an array is to refer to one of its elements. -An array reference is an expression which looks like this: - -@example -@var{array}[@var{index}] -@end example - -@noindent -Here @var{array} is the name of an array. The expression @var{index} is -the index of the element of the array that you want. The value of the -array reference is the current value of that array element. - -For example, @samp{foo[4.3]} is an expression for the element of array -@code{foo} at index 4.3. - -If you refer to an array element that has no recorded value, the value -of the reference is @code{""}, the null string. This includes elements -to which you have not assigned any value, and elements that have been -deleted (@pxref{Delete}). Such a reference automatically creates that -array element, with the null string as its value. (In some cases, -this is unfortunate, because it might waste memory inside @code{awk}). - -@cindex Arrays, determining presence of elements -You can find out if an element exists in an array at a certain index with -the expression: - -@example -@var{index} in @var{array} -@end example - -@noindent -This expression tests whether or not the particular index exists, -without the side effect of creating that element if it is not present. -The expression has the value 1 (true) if -@code{@var{array}[@var{subscript}]} exists, and 0 (false) if it does not -exist.@refill - -For example, to find out whether the array @code{frequencies} contains the -subscript @code{"2"}, you would ask:@refill - -@example -if ("2" in frequencies) print "Subscript \"2\" is present." -@end example - -Note that this is @emph{not} a test of whether or not the array -@code{frequencies} contains an element whose @emph{value} is @code{"2"}. -(There is no way to that except to scan all the elements.) Also, this -@emph{does not} create @code{frequencies["2"]}, while the following -(incorrect) alternative would:@refill - -@example -if (frequencies["2"] != "") print "Subscript \"2\" is present." -@end example - -@node Assigning Elements, Array Example, Reference to Elements, Arrays -@section Assigning Array Elements -@cindex Array assignment -@cindex Element assignment - -Array elements are lvalues: they can be assigned values just like -@code{awk} variables: - -@example -@var{array}[@var{subscript}] = @var{value} -@end example - -@noindent -Here @var{array} is the name of your array. The expression -@var{subscript} is the index of the element of the array that you want -to assign a value. The expression @var{value} is the value you are -assigning to that element of the array.@refill - -@node Array Example, Scanning an Array, Assigning Elements, Arrays -@section Basic Example of an Array - -The following program takes a list of lines, each beginning with a line -number, and prints them out in order of line number. The line numbers are -not in order, however, when they are first read: they are scrambled. This -program sorts the lines by making an array using the line numbers as -subscripts. It then prints out the lines in sorted order of their numbers. -It is a very simple program, and will get confused if it encounters repeated -numbers, gaps, or lines that don't begin with a number.@refill - -@example -BEGIN @{ - max=0 -@} - -@{ - if ($1 > max) - max = $1 - arr[$1] = $0 -@} - -END @{ - for (x = 1; x <= max; x++) - print arr[x] -@} -@end example - -The first rule just initializes the variable @code{max}. (This is not -strictly necessary, since an uninitialized variable has the null string -as its value, and the null string is effectively zero when used in -a context where a number is required.) - -The second rule keeps track of the largest line number seen so far; -it also stores each line into the array @code{arr}, at an index that -is the line's number. - -The third rule runs after all the input has been read, to print out -all the lines. - -When this program is run with the following input: - -@example -5 I am the Five man -2 Who are you? The new number two! -4 . . . And four on the floor -1 Who is number one? -3 I three you. -@end example - -@noindent -its output is this: - -@example -1 Who is number one? -2 Who are you? The new number two! -3 I three you. -4 . . . And four on the floor -5 I am the Five man -@end example - -@node Scanning an Array, Delete, Array Example, Arrays -@section Scanning All Elements of an Array -@cindex @code{for (x in @dots{})} -@cindex Arrays, special @code{for} statement -@cindex Scanning an array - -In programs that use arrays, often you need a loop that will execute -once for each element of an array. In other languages, where arrays are -contiguous and indices are limited to positive integers, this is -easy: the largest index is one less than the length of the array, and you can -find all the valid indices by counting from zero up to that value. This -technique won't do the job in @code{awk}, since any number or string -may be an array index. So @code{awk} has a special kind of @code{for} -statement for scanning an array: - -@example -for (@var{var} in @var{array}) - @var{body} -@end example - -@noindent -This loop executes @var{body} once for each different value that your -program has previously used as an index in @var{array}, with the -variable @var{var} set to that index.@refill - -Here is a program that uses this form of the @code{for} statement. The -first rule scans the input records and notes which words appear (at -least once) in the input, by storing a 1 into the array @code{used} with -the word as index. The second rule scans the elements of @code{used} to -find all the distinct words that appear in the input. It prints each -word that is more than 10 characters long, and also prints the number of -such words. @xref{Built-in}, for more information on the built--in -function @code{length}. - -@example -# Record a 1 for each word that is used at least once. -@{ - for (i = 0; i < NF; i++) - used[$i] = 1 -@} - -# Find number of distinct words more than 10 characters long. -END @{ - num_long_words = 0 - for (x in used) - if (length(x) > 10) @{ - ++num_long_words - print x - @} - print num_long_words, "words longer than 10 characters" -@} -@end example - -@noindent -@xref{Sample Program}, for a more detailed example of this type. - -The order in which elements of the array are accessed by this statement -is determined by the internal arrangement of the array elements within -@code{awk} and cannot be controlled or changed. This can lead to -problems if new elements are added to @var{array} by statements in -@var{body}; you cannot predict whether or not the @code{for} loop will -reach them. Similarly, changing @var{var} inside the loop can produce -strange results. It is best to avoid such things.@refill - -@node Delete, Multi-dimensional, Scanning an Array, Arrays -@section The @code{delete} Statement -@cindex @code{delete} statement -@cindex Deleting elements of arrays -@cindex Removing elements of arrays -@cindex Arrays, deleting an element - -You can remove an individual element of an array using the @code{delete} -statement: - -@example -delete @var{array}[@var{index}] -@end example - -When an array element is deleted, it is as if you had never referred to it -and had never given it any value. Any value the element formerly had -can no longer be obtained. - -Here is an example of deleting elements in an array: - -@example -awk '@{ for (i in frequencies) - delete frequencies[i] -@}' -@end example - -@noindent -This example removes all the elements from the array @code{frequencies}. - -If you delete an element, the @code{for} statement to scan the array -will not report that element, and the @code{in} operator to check for -the presence of that element will return 0: - -@example -delete foo[4] -if (4 in foo) - print "This will never be printed" -@end example - -@node Multi-dimensional, Multi-scanning, Delete, Arrays -@section Multi--dimensional arrays - -@cindex Subscripts, multi-dimensional in arrays -@cindex Arrays, multi-dimensional subscripts -A multi--dimensional array is an array in which an element is identified -by a sequence of indices, not a single index. For example, a -two--dimensional array requires two indices. The usual way (in most -languages, including @code{awk}) to refer to an element of a -two--dimensional array named @code{grid} is with @code{grid[x,y]}. - -@vindex SUBSEP -Multi--dimensional arrays are supported in @code{awk} through -concatenation of indices into one string. What happens is that -@code{awk} converts the indices into strings (@pxref{Conversion}) and -concatenates them together, with a separator between them. This creates -a single string that describes the values of the separate indices. The -combined string is used as a single index into an ordinary, -one--dimensional array. The separator used is the value of the special -variable @code{SUBSEP}. - -For example, suppose the value of @code{SUBSEP} is @code{","} and the -expression @samp{foo[5,12]="value"} is executed. The numbers 5 and 12 -will be concatenated with a comma between them, yielding @code{"5,12"}; -thus, the array element @code{foo["5,12"]} will be set to -@code{"value"}. - -Once the element's value is stored, @code{awk} has no record of whether -it was stored with a single index or a sequence of indices. The two -expressions @code{foo[5,12]} and @w{@code{foo[5 SUBSEP 12]}} always have -the same value. - -The default value of @code{SUBSEP} is not a comma; it is the string -@code{"\034"}, which contains a nonprinting character that is unlikely -to appear in an @code{awk} program or in the input data. - -The usefulness of choosing an unlikely character comes from the fact -that index values that contain a string matching @code{SUBSEP} lead to -combined strings that are ambiguous. Suppose that @code{SUBSEP} is a -comma; then @w{@code{foo["a,b", "c"]}} and @w{@code{foo["a", "b,c"]}} will be -indistinguishable because both are actually stored as -@code{foo["a,b,c"]}. Because @code{SUBSEP} is @code{"\034"}, such -confusion can actually happen only when an index contains the character -@code{"\034"}, which is a rare event. - -You can test whether a particular index--sequence exists in a -``multi--dimensional'' array with the same operator @code{in} used for single -dimensional arrays. Instead of a single index as the left--hand operand, -write the whole sequence of indices, separated by commas, in -parentheses:@refill - -@example -(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array} -@end example - -The following example treats its input as a two--dimensional array of -fields; it rotates this array 90 degrees clockwise and prints the -result. It assumes that all lines have the same number of -elements. - -@example -awk 'BEGIN @{ - max_nf = max_nr = 0 -@} - -@{ - if (max_nf < NF) - max_nf = NF - max_nr = NR - for (x = 1; x <= NF; x++) - vector[x, NR] = $x -@} - -END @{ - for (x = 1; x <= max_nf; x++) @{ - for (y = max_nr; y >= 1; --y) - printf("%s ", vector[x, y]) - printf("\n") - @} -@}' -@end example - -@noindent -When given the input: - -@example -1 2 3 4 5 6 -2 3 4 5 6 1 -3 4 5 6 1 2 -4 5 6 1 2 3 -@end example - -@noindent -it produces: - -@example -4 3 2 1 -5 4 3 2 -6 5 4 3 -1 6 5 4 -2 1 6 5 -3 2 1 6 -@end example - -@node Multi-scanning, , Multi-dimensional, Arrays -@section Scanning Multi--dimensional Arrays - -There is no special @code{for} statement for scanning a -``multi--dimensional'' array; there cannot be one, because in truth there -are no multi--dimensional arrays or elements; there is only a -multi--dimensional @emph{way of accessing} an array. - -However, if your program has an array that is always accessed as -multi--dimensional, you can get the effect of scanning it by combining -the scanning @code{for} statement (@pxref{Scanning an Array}) with the -@code{split} built--in function (@pxref{String Functions}). It works -like this: - -@example -for (combined in @var{array}) @{ - split (combined, separate, SUBSEP) - @dots{} -@} -@end example - -@noindent -This finds each concatenated, combined index in the array, and splits it -into the individual indices by breaking it apart where the value of -@code{SUBSEP} appears. The split--out indices become the elements of -the array @code{separate}. - -Thus, suppose you have previously stored in @code{@var{array}[1, -"foo"]}; then an element with index @code{"1\034foo"} exists in -@var{array}. (Recall that the default value of @code{SUBSEP} contains -the character with code 034.) Sooner or later the @code{for} statement -will find that index and do an iteration with @code{combined} set to -@code{"1\034foo"}. Then the @code{split} function will be called as -follows: - -@example -split ("1\034foo", separate, "\034") -@end example - -@noindent -The result of this is to set @code{separate[1]} to 1 and @code{separate[2]} -to @code{"foo"}. Presto, the original sequence of separate indices has -been recovered. - -@node Built-in, User-defined, Arrays, Top -@chapter Built--in functions - -@cindex Built-in functions, list of -@dfn{Built--in} functions are functions always available for your -@code{awk} program to call. This chapter defines all the built--in -functions that exist; some of them are mentioned in other sections, but -they are summarized here for your convenience. (You can also define -new functions yourself. @xref{User-defined}.) - -In most cases, any extra arguments given to built--in functions are ignored. -The defaults for omitted arguments vary from function to function and are -described under the individual functions. - -The name of a built--in function need not be followed immediately by -the opening left parenthesis of the arguments; whitespace is allowed. -However, it is wise to write no space there, since user--defined -functions do not allow space. - -When a function is called, expressions that create the function's actual -parameters are evaluated completely before the function call is performed. -For example, in the code fragment: - -@example -i = 4 -j = myfunc(i++) -@end example - -@noindent -the variable @code{i} will be set to 5 before @code{myfunc} is called -with a value of 4 for its actual parameter. - -@menu -* Numeric Functions:: Functions that work with numbers, - including @code{int}, @code{sin} and @code{rand}. - -* String Functions:: Functions for string manipulation, - such as @code{split}, @code{match}, and @code{sprintf}. - -* I/O Functions:: Functions for files and shell commands -@end menu - -@node Numeric Functions, String Functions, , Built-in -@section Numeric Built--in Functions - -The general syntax of the numeric built--in functions is the same for -each. Here is an example of that syntax:@refill - -@example -awk '# Read input records containing a pair of points: x0, y0, x1, y1. - # Print the points and the distance between them. - @{ printf "%f %f %f %f %f\n", $1, $2, $3, $4, - sqrt(($2-$1) * ($2-$1) + ($4-$3) * ($4-$3)) @}' -@end example - -@noindent -This calculates the square root of a calculation that uses the values -of the fields. It then prints the first four fields of the input -record and the result of the square root calculation. - -Here is the full list of numeric built--in functions: - -@table @code -@item int(@var{x}) -This gives you the integer part of @var{x}, truncated toward 0. This -produces the nearest integer to @var{x}, located between @var{x} and 0. - -For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)} -is -3, and @code{int(-3)} is -3 as well.@refill - -@item sqrt(@var{x}) -This gives you the positive square root of @var{x}. It reports an error -if @var{x} is negative.@refill - -@item exp(@var{x}) -This gives you the exponential of @var{x}, or reports an error if @var{x} is -out of range. The range of values @var{x} can have depends on your -machine's floating point representation.@refill - -@item log(@var{x}) -This gives you the natural logarithm of @var{x}, if @var{x} is positive; -otherwise, it reports an error.@refill - -@item sin(@var{x}) -This gives you the sine of @var{x}, with @var{x} in radians. - -@item cos(@var{x}) -This gives you the cosine of @var{x}, with @var{x} in radians. - -@item atan2(@var{y}, @var{x}) -This gives you the arctangent of @var{y/x}, with both in radians. - -@item rand() -This gives you a random number. The values of @w{@code{rand()}} are -uniformly--distributed between 0 and 1. The value is never 0 and never -1. - -Often you want random integers instead. Here is a user--defined function -you can use to obtain a random nonnegative integer less than @var{n}: - -@example -function randint(n) @{ - return int(n * rand()) -@} -@end example - -@noindent -The multiplication produces a random real number at least 0, and less -than @var{n}. We then make it an integer (using @code{int}) between 0 -and @code{@var{n}@minus{}1}. - -Here is an example where a similar function is used to produce -random integers between 1 and @var{n}: - -@example -awk ' -# Function to roll a simulated die. -function roll(n) @{ return 1 + int(rand() * n) @} - -# Roll 3 six--sided dice and print total number of points. -@{ - printf("%d points\n", roll(6)+roll(6)+roll(6)) -@}' -@end example - -@emph{Note} that @w{@code{rand()}} starts generating numbers from the same -point, or @dfn{seed}, each time you run @code{awk}. This means that -the same program will produce the same results each time you run it. -The numbers are random within one @code{awk} run, but predictable -from run to run. This is convenient for debugging, but if you want -a program to do different things each time it is used, you must change -the seed to a value that will be different in each run. To do this, -use @code{srand}. - -@item srand(@var{x}) -The function @code{srand(@var{x})} sets the starting point, or @dfn{seed}, -for generating random numbers to the value @var{x}. - -Each seed value leads to a particular sequence of ``random'' numbers. -Thus, if you set the seed to the same value a second time, you will get -the same sequence of ``random'' numbers again. - -If you omit the argument @var{x}, as in @code{srand()}, then the current -date and time of day are used for a seed. This is the way to get random -numbers that are truly unpredictable. - -The return value of @code{srand()} is the previous seed. This makes it -easy to keep track of the seeds for use in consistently reproducing -sequences of random numbers. -@end table - -@node String Functions, I/O Functions, Numeric Functions, Built-in -@section Built--in Functions for String Manipulation - -@table @code -@item index(@var{in}, @var{find}) -@findex match -This searches the string @var{in} for the first occurrence of the string -@var{find}, and returns the position where that occurrence begins in the -string @var{in}. For example:@refill - -@example -awk 'BEGIN @{ print index("peanut", "an") @}' -@end example - -@noindent -prints @samp{3}. If @var{find} is not found, @code{index} returns 0. - -@item length(@var{string}) -@findex length -This gives you the number of characters in @var{string}. If -@var{string} is a number, the length of the digit string representing -that number is returned. For example, @code{length("abcde")} is 5. -Whereas, @code{length(15 * 35)} works out to 3. How? Well, 15 * 35 = -525, and 525 is then converted to the string @samp{"525"}, which has -three characters. - -@item match(@var{string}, @var{regexp}) -@findex match -The @code{match} function searches the string, @var{string}, for the -longest, leftmost substring matched by the regular expression, -@var{regexp}. It returns the character position, or @dfn{index}, of -where that substring begins (1, if it starts at the beginning of -@var{string}). If no match if found, it returns 0. - -@vindex RSTART -@vindex RLENGTH -The @code{match} function sets the special variable @code{RSTART} to -the index. It also sets the special variable @code{RLENGTH} to the -length of the matched substring. If no match is found, @code{RSTART} -is set to 0, and @code{RLENGTH} to -1. - -For example: - -@example -awk '@{ - if ($1 == "FIND") - regex = $2 - else @{ - where = match($0, regex) - if (where) - print "Match of", regex, "found at", where, "in", $0 - @} -@}' -@end example - -@noindent -This program looks for lines that match the regular expression stored in -the variable @code{regex}. This regular expression can be changed. If the -first word on a line is @samp{FIND}, @code{regex} is changed to be the -second word on that line. Therefore, given: - -@example -FIND fo*bar -My program was a foobar -But none of it would doobar -FIND Melvin -JF+KM -This line is property of The Reality Engineering Co. -This file was created by Melvin. -@end example - -@noindent -@code{awk} prints: - -@example -Match of fo*bar found at 18 in My program was a foobar -Match of Melvin found at 26 in This file was created by Melvin. -@end example - -@item split(@var{string}, @var{array}, @var{field_separator}) -@findex split -This divides @var{string} up into pieces separated by -@var{field_separator}, and stores the pieces in @var{array}. The -first piece is stored in @code{@var{array}[1]}, the second piece in -@code{@var{array}[2]}, and so forth. The string value of the third -argument, @var{field_separator}, is used as a regexp to search for to -find the places to split @var{string}. If the @var{field_separator} -is omitted, the value of @code{FS} is used. @code{split} returns the -number of elements created.@refill - -The @code{split} function, then, splits strings into pieces in a -manner similar to the way input lines are split into fields. For example: - -@example -split("auto-da-fe", a, "-") -@end example - -@noindent -splits the string @samp{auto-da-fe} into three fields using @samp{-} as the -separator. It sets the contents of the array @code{a} as follows: - -@example -a[1] = "auto" -a[2] = "da" -a[3] = "fe" -@end example - -@noindent -The value returned by this call to @code{split} is 3. - -@item sprintf(@var{format}, @var{expression1},@dots{}) -@findex sprintf -This returns (without printing) the string that @code{printf} would -have printed out with the same arguments (@pxref{Printf}). For -example: - -@example -sprintf("pi = %.2f (approx.)", 22/7) -@end example - -@noindent -returns the string @w{@code{"pi = 3.14 (approx.)"}}. - -@item sub(@var{regexp}, @var{replacement_string}, @var{target_variable}) -@findex sub -The @code{sub} function alters the value of @var{target_variable}. -It searches this value, which should be a string, for the -leftmost substring matched by the regular expression, @var{regexp}, -extending this match as far as possible. Then the entire string is -changed by replacing the matched text with @var{replacement_string}. -The modified string becomes the new value of @var{target_variable}. - -This function is peculiar because @var{target_variable} is not simply -used to compute a value, and not just any expression will do: it -must be a variable, field or array reference, so that @code{sub} can -store a modified value there. If this argument is omitted, then the -default is to use and alter @code{$0}. - -For example:@refill - -@example -str = "water, water, everywhere" -sub(/at/, "ith", str) -@end example - -@noindent -sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the -leftmost, longest occurrence of @samp{at} with @samp{ith}. - -The @code{sub} function returns the number of substitutions made (either -one or zero). - -The special character, @samp{&}, in the replacement string, -@var{replacement_string}, stands for the precise substring that was -matched by @var{regexp}. (If the regexp can match more than one string, -then this precise substring may vary.) For example:@refill - -@example -awk '@{ sub(/candidate/, "& and his wife"); print @}' -@end example - -@noindent -will change the first occurrence of ``candidate'' to ``candidate and -his wife'' on each input line. - -@noindent -The effect of this special character can be turned off by preceding -it with a backslash (@samp{\&}). To include a backslash in the -replacement string, it too must be preceded with a (second) backslash. - -Note: if you use @code{sub} with a third argument that is not a variable, -field or array element reference, then it will still search for the pattern -and return 0 or 1, but the modified string is thrown away because there -is no place to put it. For example: - -@example -sub(/USA/, "United States", "the USA and Canada") -@end example - -will indeed produce a string @w{@code{"the United States and Canada"}}, -but there will be no way to use that string! - -@item gsub(@var{regexp}, @var{replacement_string}, @var{target_variable}) -@findex gsub -This is similar to the @code{sub} function, except @code{gsub} replaces -@emph{all} of the longest, leftmost, @emph{non--overlapping} matching -substrings it can find. The ``g'' in @code{gsub} stands for @dfn{global}, -which means replace @emph{everywhere}. For example:@refill - -@example -awk '@{ gsub(/Britain/, "United Kingdom"); print @}' -@end example - -@noindent -replaces all occurrences of the string @samp{Britain} with @samp{United -Kingdom} for all input records.@refill - -The @code{gsub} function returns the number of substitutions made. If -the variable to be searched and altered, @var{target_variable}, is -omitted, then the entire input record, @code{$0}, is used.@refill - -The characters @samp{&} and @samp{\} are special in @code{gsub} -as they are in @code{sub} (see immediately above). - -@item substr(@var{string}, @var{start}, @var{length}) -@findex substr -This returns a @var{length}--character--long substring of @var{string}, -starting at character number @var{start}. The first character of a -string is character number one. For example, -@code{substr("washington", 5, 3)} returns @samp{"ing"}.@refill - -If @var{length} is not present, this function returns the whole suffix of -@var{string} that begins at character number @var{start}. For example, -@code{substr("washington", 5)} returns @samp{"ington"}. -@end table - -@node I/O Functions, , String Functions, Built-in -@section Built--in Functions for I/O to Files and Commands - -@table @code -@item close(@var{filename}) -Close the file @var{filename}. The argument may alternatively be -a shell command that was used for redirecting to or from a pipe; then the -pipe is closed. - -@xref{Close Input}, regarding closing input files and pipes. -@xref{Close Output}, regarding closing output files and pipes. - -@item system(@var{command}) -@findex system -@cindex Interaction of @code{awk} with other programs -The system function allows the user to execute operating system commands and -then return to the @code{awk} program. The @code{system} function executes -the command given by the string value of @var{command}. It returns, as its -value, the status returned by the command that was executed. This is known -as returning the @dfn{exit status}. - -For example, if the following fragment of code is put in your @code{awk} -program: - -@example -END @{ - system("mail -s 'awk run done' operator < /dev/null") -@} -@end example - -@noindent -the system operator will be sent mail when the @code{awk} program -finishes processing input and begins its end--of--input processing. - -Note that much the same result can be obtained by redirecting -@code{print} or @code{printf} into a pipe. -However, if your @code{awk} program is interactive, this function is -useful for cranking up large self--contained programs, such as a shell -or an editor.@refill -@end table - -@node User-defined, Special, Built-in, Top -@chapter User--defined Functions - -@cindex User-defined functions -@cindex Functions, user-defined -Complicated @code{awk} programs can often be simplified by defining -your own functions. User--defined functions can be called just like -built--in ones (@pxref{Function Calls}), but it is up to you to define -them---to tell @code{awk} what they should do. - -@menu -* Definition Syntax:: How to write definitions and what they mean. -* Function Example:: An example function definition and what it does. -* Function Caveats:: Things to watch out for. -* Return Statement:: Specifying the value a function returns. -@end menu - -@node Definition Syntax, Function Example, , User-defined -@section Syntax of Function Definitions - -The definition of a function named @var{name} looks like this: - -@example -function @var{name} (@var{parameter-list}) @{ - @var{body-of-function} -@} -@end example - -A valid function name is like a valid variable name: a sequence of -letters, digits and underscores, not starting with a digit. - -Such function definitions can appear anywhere between the rules -of the @code{awk} program. The general format of an @code{awk} -program, then, is now modified to include sequences of rules @emph{and} -user--defined function definitions. - -The function definition need not precede all the uses of the function. -This is because @code{awk} reads the entire program before starting to -execute any of it. - -The @var{parameter-list} is a list of the function's @dfn{local} -variable names, separated by commas. Within the body of the function, -local variables refer to arguments with which the function is called. -If the function is called with fewer arguments than it has local -variables, this is not an error; the extra local variables are simply -set as the null string. - -The local variable values hide or @dfn{shadow} any variables of the same -names used in the rest of the program. The shadowed variables are not -accessible in the function definition, because there is no way to name -them while their names have been taken away for the local variables. -All other variables used in the @code{awk} program can be referenced -or set normally in the function definition. - -The local variables last only as long as the function is executing. -Once the function finishes, the shadowed variables come back. - -The @var{body-of-function} part of the definition is the most important -part, because this is what says what the function should actually @emph{do}. -The local variables exist to give the body a way to talk about the arguments. - -Functions may be @dfn{recursive}, i.e., they can call themselves, either -directly, or indirectly (via calling a second function that calls the first -again). - -The keyword @samp{function} may also be written @samp{func}. - -@node Function Example, Function Caveats, Definition Syntax, User-defined -@section Function Definition Example - -Here is an example of a user--defined function, called @code{myprint}, that -takes a number and prints it in a specific format. - -@example -function myprint(num) -@{ - printf "%6.3g\n", num -@} -@end example - -@noindent -To illustrate, let's use the following @code{awk} rule to use, or -@dfn{call}, our @code{myprint} function: - -@example -$3 > 0 @{ myprint($3) @}' -@end example - -@noindent -This program prints, in our special format, all the third fields that -contain a positive number in our input. Therefore, when given: - -@example - 1.2 3.4 5.6 7.8 - 9.10 11.12 13.14 15.16 -17.18 19.20 21.22 23.24 -@end example - -@noindent -this program, using our function to format the results, will print: - -@example - 5.6 - 13.1 - 21.2 -@end example - -Here is a rather contrived example of a recursive function. It prints a -string backwards: - -@example -function rev (str, len) @{ - if (len == 0) @{ - printf "\n" - return - @} - printf "%c", substr(str, len, 1) - rev(str, len - 1) -@} -@end example - -@node Function Caveats, Return Statement, Function Example, User-defined -@section Caveats of Function Calling - -@emph{Note} that there cannot be any blanks between the function name and -the left parenthesis of the argument list, when calling a function. -This is so @code{awk} can tell you are not trying to concatenate the value -of a variable with the value of an expression inside the parentheses. - -When a function is called, it is given a @emph{copy} of the values of -its arguments. This is called @dfn{passing by value}. The caller may -use a variable as the expression for the argument, but the called -function does not know this: all it knows is what value the argument -had. For example, if you write this code: - -@example -foo = "bar" -z = myfunc(foo) -@end example - -@noindent -then you should not think of the argument to @code{myfunc} as being -``the variable @code{foo}''. Instead, think of the argument as the -string value, @code{"bar"}. - -If the function @code{myfunc} alters the values of its local variables, -this has no effect on any other variables. In particular, if @code{myfunc} -does this: - -@example -function myfunc (win) @{ - print win - win = "zzz" - print win -@} -@end example - -@noindent -to change its first argument variable @code{win}, this @emph{does not} -change the value of @code{foo} in the caller. The role of @code{foo} in -calling @code{myfunc} ended when its value, @code{"bar"}, was computed. -If @code{win} also exists outside of @code{myfunc}, this definition -will not change it---that value is shadowed during the execution of -@code{myfunc} and cannot be seen or changed from there. - -However, when arrays are the parameters to functions, they are @emph{not} -copied. Instead, the array itself is made available for direct manipulation -by the function. This is usually called @dfn{passing by reference}. -Changes made to an array parameter inside the body of a function @emph{are} -visible outside that function. @emph{This can be very dangerous if you don't -watch what you are doing.} For example:@refill - -@example -function changeit (array, ind, nvalue) @{ - array[ind] = nvalue -@} - -BEGIN @{ - a[1] = 1 ; a[2] = 2 ; a[3] = 3 - changeit(a, 2, "two") - printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3] - @} -@end example - -@noindent -will print @samp{a[1] = 1, a[2] = two, a[3] = 3}, because the call to -@code{changeit} stores @code{"two"} in the second element of @code{a}. - -@node Return Statement, , Function Caveats, User-defined -@section The @code{return} statement -@cindex @code{return} statement - -The body of a user--defined function can contain a @code{return} statement. -This statement returns control to the rest of the @code{awk} program. It -can also be used to return a value for use in the rest of the @code{awk} -program. It looks like:@refill - -@display -@code{return @var{expression}} -@end display - -The @var{expression} part is optional. If it is omitted, then the returned -value is undefined and, therefore, unpredictable. - -A @code{return} statement with no value expression is assumed at the end of -every function definition. So if control reaches the end of the function -definition, then the function returns an unpredictable value. - -Here is an example of a user--defined function that returns a value -for the largest number among the elements of an array:@refill - -@example -function maxelt (vec, i, ret) @{ - for (i in vec) @{ - if (ret == "" || vec[i] > ret) - ret = vec[i] - @} - return ret -@} -@end example - -@noindent -You call @code{maxelt} with one argument, an array name. The local -variables @code{i} and @code{ret} are not intended to be arguments; -while there is nothing to stop you from passing two or three arguments -to @code{maxelt}, the results would be strange. - -When writing a function definition, it is conventional to separate the -parameters from the local variables with extra spaces, as shown above -in the definition of @code{maxelt}. - -Here is a program that uses, or calls, our @code{maxelt} function. This -program loads an array, calls @code{maxelt}, and then reports the maximum -number in that array:@refill - -@example -awk ' -function maxelt (vec, i, ret) @{ - for (i in vec) @{ - if (ret == "" || vec[i] > ret) - ret = vec[i] - @} - return ret -@} - -# Load all fields of each record into nums. -@{ - for(i = 1; i <= NF; i++) - nums[NR, i] = $i -@} - -END @{ - print maxelt(nums) -@}' -@end example - -Given the following input: - -@example - 1 5 23 8 16 -44 3 5 2 8 26 -256 291 1396 2962 100 --6 467 998 1101 -99385 11 0 225 -@end example - -@noindent -our program tells us (predictably) that: - -@example -99385 -@end example - -@noindent -is the largest number in our array. - -@node Special, Sample Program , User-defined, Top -@chapter Special Variables - -Most @code{awk} variables are available for you to use for your own -purposes; they will never change except when your program assigns them, and -will never affect anything except when your program examines them. - -A few variables have special meanings. Some of them @code{awk} examines -automatically, so that they enable you to tell @code{awk} how to do -certain things. Others are set automatically by @code{awk}, so that they -carry information from the internal workings of @code{awk} to your program. - -Most of these variables are also documented in the chapters where their -areas of activity are described. - -@menu -* User-modified:: Special variables that you change to control @code{awk}. - -* Auto-set:: Special variables where @code{awk} gives you information. -@end menu - -@node User-modified, Auto-set, , Special -@section Special Variables That Control @code{awk} -@cindex Special variables, user modifiable - -This is a list of the variables which you can change to control how -@code{awk} does certain things. - -@table @code -@c it's unadvisable to have multiple index entries for the same name -@c since in Info there is no way to distinguish the two. -@c @vindex FS -@item FS -@code{FS} is the input field separator (@pxref{Field Separators}). -The value is a regular expression that matches the separations -between fields in an input record. - -The default value is @w{@code{" "}}, a string consisting of a single -space. As a special exception, this value actually means that any -sequence of spaces and tabs is a single separator. It also causes -spaces and tabs at the beginning or end of a line to be ignored. - -You can set the value of @code{FS} on the command line using the -@samp{-F} option: - -@example -awk -F, '@var{program}' @var{input-files} -@end example - -@item OFMT -@c @vindex OFMT -This string is used by @code{awk} to control conversion of numbers to -strings (@pxref{Conversion}). It works by being passed, in effect, as -the first argument to the @code{sprintf} function. Its default value -is @code{"%.6g"}.@refill - -@item OFS -@c @vindex OFS -This is the output field separator (@pxref{Output Separators}). It is -output between the fields output by a @code{print} statement. Its -default value is @w{@code{" "}}, a string consisting of a single space. - -@item ORS -@c @vindex ORS -This is the output record separator (@pxref{Output Separators}). It -is output at the end of every @code{print} statement. Its default -value is the newline character, often represented in @code{awk} -programs as @samp{\n}. - -@item RS -@c @vindex RS -This is @code{awk}'s record separator (@pxref{Records}). Its default -value is a string containing a single newline character, which means -that an input record consists of a single line of text.@refill - -@item SUBSEP -@c @vindex SUBSEP -@code{SUBSEP} is a subscript separator (@pxref{Multi-dimensional}). It -has the default value of @code{"\034"}, and is used to separate the -parts of the name of a multi--dimensional array. Thus, if you access -@code{foo[12,3]}, it really accesses @code{foo["12\0343"]}.@refill -@end table - -@node Auto-set, , User-modified, Special -@section Special Variables That Convey Information to You - -This is a list of the variables that are set automatically by @code{awk} -on certain occasions so as to provide information for your program. - -@table @code -@item ARGC -@itemx ARGV -@c @vindex ARGC -@c @vindex ARGV -The command--line arguments available to @code{awk} are stored in an -array called @code{ARGV}. @code{ARGC} is the number of command--line -arguments present. @code{ARGV} is indexed from zero to @w{@code{ARGC} - 1}. -For example: - -@example -awk '@{ print ARGV[$1] @}' inventory-shipped BBS-list -@end example - -@noindent -In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]} -contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains -@code{"BBS-list"}. @code{ARGC} is 3, one more than the index of the -last element in @code{ARGV} since the elements are numbered from zero. - -Notice that the @code{awk} program is not treated as an argument. The -@samp{-f} @file{@var{filename}} option, and the @samp{-F} option, -are also not treated as arguments for this purpose. - -Variable assignments on the command line @emph{are} treated as arguments, -and do show up in the @code{ARGV} array. - -Your program can alter @code{ARGC} the elements of @code{ARGV}. Each -time @code{awk} reaches the end of an input file, it uses the next -element of @code{ARGV} as the name of the next input file. By storing a -different string there, your program can change which files are read. -You can use @samp{-} to represent the standard input. By storing -additional elements and incrementing @code{ARGC} you can cause -additional files to be read. - -If you decrease the value of @code{ARGC}, that eliminates input files -from the end of the list. By recording the old value of @code{ARGC} -elsewhere, your program can treat the eliminated arguments as -something other than file names. - -To eliminate a file from the middle of the list, store the null string -(@code{""}) into @code{ARGV} in place of the file's name. As a -special feature, @code{awk} ignores file names that have been -replaced with the null string. - -@item ENVIRON -@vindex ENVIRON -This is an array that contains the values of the environment. The array -indices are the environment variable names; the values are the values of -the particular environment variables. For example, -@code{ENVIRON["HOME"]} might be @file{/u/close}. Changing this array -does not affect the environment passed on to any programs that -@code{awk} may spawn via redirection or the @code{system} function. -(This may not work under operating systems other than MS-DOS, Unix, or -GNU.) - -@item FILENAME -@c @vindex FILENAME -This is the name of the file that @code{awk} is currently reading. -If @code{awk} is reading from the standard input (in other words, -there are no files listed on the command line), -@code{FILENAME} is set to @code{"-"}. -@code{FILENAME} is changed each time a new file is read (@pxref{Reading -Files}).@refill - -@item FNR -@c @vindex FNR -@code{FNR} is the current record number in the current file. @code{FNR} is -incremented each time a new record is read (@pxref{Getline}). -It is reinitialized to 0 each time a new input file is started. - -@item NF -@c @vindex NF -@code{NF} is the number of fields in the current input record. -@code{NF} is set each time a new record is read, when a new field is -created, or when $0 changes (@pxref{Fields}).@refill - -@item NR -@c @vindex NR -This is the number of input records @code{awk} has processed since -the beginning of the program's execution. (@pxref{Records}). -@code{NR} is set each time a new record is read.@refill - -@item RLENGTH -@c @vindex RLENGTH -@code{RLENGTH} is the length of the string matched by the @code{match} -function (@pxref{String Functions}). @code{RLENGTH} is set by -invoking the @code{match} function. Its value is the length of the -matched string, or -1 if no match was found.@refill - -@item RSTART -@c @vindex RSTART -@code{RSTART} is the start of the string matched by the @code{match} -function (@pxref{String Functions}). @code{RSTART} is set by invoking -the @code{match} function. Its value is the position of the string where -the matched string starts, or 0 if no match was found.@refill -@end table - -@node Sample Program, Notes, Special , Top -@appendix Sample Program - -The following example is a complete @code{awk} program, which prints -the number of occurrences of each word in its input. It illustrates the -associative nature of @code{awk} arrays by using strings as subscripts. It -also demonstrates the @code{for @var{x} in @var{array}} construction. -Finally, it shows how @code{awk} can be used in conjunction with other -utility programs to do a useful task of some complexity with a minimum of -effort. Some explanations follow the program listing.@refill - -@example -awk ' -# Print list of word frequencies -@{ - for (i = 1; i <= NF; i++) - freq[$i]++ -@} - -END @{ - for (word in freq) - printf "%s\t%d\n", word, freq[word] -@}' -@end example - -The first thing to notice about this program is that it has two rules. The -first rule, because it has an empty pattern, is executed on every line of -the input. It uses @code{awk}'s field--accessing mechanism (@pxref{Fields}) -to pick out the individual words from the line, and the special variable -@code{NF} (@pxref{Special}) to know how many fields are available. - -For each input word, an element of the array @code{freq} is incremented to -reflect that the word has been seen an additional time.@refill - -The second rule, because it has the pattern @code{END}, is not executed -until the input has been exhausted. It prints out the contents of the -@code{freq} table that has been built up inside the first action.@refill - -Note that this program has several problems that would prevent it from being -useful by itself on real text files:@refill - -@itemize @bullet -@item -Words are detected using the @code{awk} convention that fields are -separated by whitespace and that other characters in the input (except -newlines) don't have any special meaning to @code{awk}. This means that -punctuation characters count as part of words.@refill - -@item -The @code{awk} language considers upper and lower case characters to be -distinct. Therefore, @samp{foo} and @samp{Foo} will not be treated by this -program as the same word. This is undesirable since in normal text, words -are capitalized if they begin sentences, and a frequency analyzer should not -be sensitive to that.@refill - -@item -The output does not come out in any useful order. You're more likely to be -interested in which words occur most frequently, or having an alphabetized -table of how frequently each word occurs.@refill -@end itemize - -The way to solve these problems is to use other operating system utilities -to process the input and output of the @code{awk} script. Suppose the -script shown above is saved in the file @file{frequency.awk}. Then the -shell command:@refill - -@example -tr A-Z a-z < file1 | tr -cd 'a-z\012' \ - | awk -f frequency.awk \ - | sort +1 -nr -@end example - -@noindent -produces a table of the words appearing in @file{file1} in order of -decreasing frequency. - -The first @code{tr} command in this pipeline translates all the upper case -characters in @file{file1} to lower case. The second @code{tr} command -deletes all the characters in the input except lower case characters and -newlines. The second argument to the second @code{tr} is quoted to protect -the backslash in it from being interpreted by the shell. The @code{awk} -program reads this suitably massaged data and produces a word frequency -table, which is not ordered. - -The @code{awk} script's output is now sorted by the @code{sort} command and -printed on the terminal. The options given to @code{sort} in this example -specify to sort by the second field of each input line (skipping one field), -that the sort keys should be treated as numeric quantities (otherwise -@samp{15} would come before @samp{5}), and that the sorting should be done -in descending (reverse) order.@refill - -See the general operating system documentation for more information on how -to use the @code{tr} and @code{sort} commands.@refill - -@ignore -@strong{I have some more substantial programs courtesy of Rick Adams -at UUNET. I am planning on incorporating those either in addition to or -instead of this program.} -@end ignore - -@node Notes, Glossary, Sample Program, Top -@appendix Implementation Notes - -This appendix contains information mainly of interest to implementors and -maintainers of @code{gawk}. Everything in it applies specifically to -@code{gawk}, and not to other implementations. - -@menu -* Extensions:: Things@code{gawk} does that Unix @code{awk} does not. - -* Future Extensions:: Things likely to appear in a future release. - -* Improvements:: Suggestions for future improvements. - -* Manual Improvements:: Suggestions for improvements to this manual. -@end menu - -@node Extensions, Future Extensions, , Notes -@appendixsec GNU Extensions to the AWK Language - -Several new features are in a state of flux. They are described here -merely to document them somewhat, but they will probably change. We hope -they will be incorporated into other versions of @code{awk}, too. - -All of these features can be turned off either by compiling @code{gawk} -with @samp{-DSTRICT}, or by invoking @code{gawk} as @samp{awk}. - -@table @asis -@item The @code{AWKPATH} environment variable -When opening a file supplied via the @samp{-f} option, if the filename does -not contain a @samp{/}, @code{gawk} will perform a @dfn{path search} -for the file, similar to that performed by the shell. @code{gawk} gets -its search path from the @code{AWKPATH} environment variable. If that -variable does not exist, it uses the default path -@code{".:/usr/lib/awk:/usr/local/lib/awk"}.@refill - -@item Case Independent Matching -Two new operators have been introduced, @code{~~}, and @code{!~~}. -These perform regular expression match and no-match operations that are -case independent. In other words, @samp{A} and @samp{a} would both -match @samp{/a/}. - -@item The @samp{-i} option -This option causes the @code{~} and @code{!~} operators to behave -like the @code{~~} and @code{!~~} operators described above. - -@item The @samp{-v} option -This option prints version information for this particular copy of @code{gawk}. -This is so you can determine if your copy of @code{gawk} is up to date -with respect to whatever the Free Software Foundation is currently -distributing. It may disappear in a future version of @code{gawk}. -@end table - -@node Future Extensions, Improvements, Extensions, Notes -@appendixsec Extensions Likely To Appear In A Future Release - -Here are some more extensions that indicate the directions we are -currently considering for @code{gawk}. Like the previous section, this -section is also subject to change. None of these are implemented yet. - -@table @asis -@item The @code{IGNORECASE} special variable -If @code{IGNORECASE} is non--zero, then @emph{all} regular expression matching -will be done in a case--independent fashion. The @samp{-i} option and the -@code{~~} and @code{!~~} operators will go away, as this mechanism -generalizes those facilities. - -@item More Escape Sequences -The ANSI C @samp{\a}, and @samp{\x} escape sequences will be recognized. -Unix @code{awk} does not recognize @samp{\v}, although @code{gawk} does. - -@item @code{RS} as a regexp -The meaning of @code{RS} will be generalized along the lines of @code{FS}. - -@item Transliteration Functions -We are planning on adding @code{toupper} and @code{tolower} functions which -will take string arguments, and return strings where the case of each letter -has been transformed to upper-- or lower--case respectively. - -@item Access To System File Descriptors -@code{gawk} will recognize the special file names @file{/dev/stdin}, -@file{/dev/stdout}, @file{/dev/stderr}, and @file{/dev/fd/@var{N}} internally. -These will allow access to inherited file descriptors from within an -@code{awk} program.@refill - -@c this is @emph{very} long term --- not worth including right now. -@ignore -@item The C Comma Operator -We may add the C comma operator, which takes the form -@var{expr1}@code{,}@code{expr2}. The first expression is evaluated, and the -result is thrown away. The value of the full expression is the value of -@var{expr2}.@refill -@end ignore -@end table - -@node Improvements, Manual Improvements, Future Extensions, Notes -@appendixsec Suggestions for Future Improvements - -Here are some projects that would--be @code{gawk} hackers might like to take -on. They vary in size from a few days to a few weeks of programming, -depending on which one you choose and how fast a programmer you are. Please -send any improvements you write to the maintainers at the GNU -project.@refill - -@enumerate -@item -State machine regexp matcher: At present, @code{gawk} uses the backtracking -regular expression matcher from the GNU subroutine library. If a regexp is -really going to be used a lot of times, it is faster to convert it once to a -description of a finite state machine, then run a routine simulating that -machine every time you want to match the regexp. You could use -the matching routines used by GNU @code{egrep}. - -@item -Compilation of @code{awk} programs: @code{gawk} uses a @code{Bison} -(YACC--like) parser to convert the script given it into a syntax tree; -the syntax tree is then executed by a simple recursive evaluator. -Both of these steps incur a lot of overhead, since parsing can be slow -(especially if you also do the previous project and convert regular -expressions to finite state machines at compile time) and the -recursive evaluator performs many procedure calls to do even the -simplest things.@refill - -It should be possible for @code{gawk} to convert the script's parse tree -into a C program which the user would then compile, using the normal -C compiler and a special @code{gawk} library to provide all the needed -functions (regexps, fields, associative arrays, type coercion, and so -on).@refill - -An easier possibility might be for an intermediate phase of @code{awk} to -convert the parse tree into a linear byte code form like the one used -in GNU Emacs Lisp. The recursive evaluator would then be replaced by -a straight line byte code interpreter that would be intermediate in speed -between running a compiled program and doing what @code{gawk} does -now.@refill -@end enumerate - -@node Manual Improvements, , Improvements, Notes -@appendixsec Suggestions For Future Improvements of This Manual - -@enumerate -@item -An error message section has not been included in this version of the -manual. Perhaps some nice beta testers will document some of the messages -for the future. - -@item -A summary page has not been included, as the ``man'', or help, page that -comes with the @code{gawk} code should suffice. - -GNU only supports Info, so this manual itself should contain whatever -forms of information it would be useful to have on an Info summary page. - -@item -A function and variable index has not been included as we are not sure what to -put in it. -@c @strong{ADR: I think I can tackle this.} - -@item -A section summarizing the differences between V7 @code{awk} and -System V Release 4 @code{awk} would be useful for long--time @code{awk} -hackers. -@end enumerate - -@node Glossary, Index , Notes, Top -@appendix Glossary - -@c @strong{Add a cross-reference to most of these entries.} - -@table @asis -@item Action -A series of @code{awk} statements attached to a rule. If the rule's -pattern matches an input record, the @code{awk} language executes the -rule's action. Actions are always enclosed in curly braces.@refill - -@item Amazing @code{awk} assembler -Henry Spencer at the University of Toronto wrote a retargetable assembler -completely as @code{awk} scripts. It is thousands of lines long, including -machine descriptions for several 8--bit microcomputers. It is distributed -with @code{gawk} and is a good example of a program that would have been -better written in another language.@refill - -@item Assignment -An @code{awk} expression that changes the value of some @code{awk} -variable or data object. An object that you can assign to is called an -@dfn{lvalue}.@refill - -@item Built-in function -The @code{awk} language provides built--in functions that perform various -numerical and string computations. Examples are @code{sqrt} (for the -square root of a number) and @code{substr} (for a substring of a -string).@refill - -@item C -The system programming language that most of GNU is written in. The -@code{awk} programming language has C--like syntax, and this manual -points out similarities between @code{awk} and C when appropriate.@refill - -@item Compound statement -A series of @code{awk} statements, enclosed in curly braces. Compound -statements may be nested.@refill - -@item Concatenation -Concatenating two strings means sticking them together, one after another, -giving a new string. For example, the string @samp{foo} concatenated with -the string @samp{bar} gives the string @samp{foobar}.@refill - -@item Conditional expression -A relation that is either true or false, such as @code{(a < b)}. -Conditional expressions are used in @code{if} and @code{while} statements, -and in patterns to select which input records to process.@refill - -@item Curly braces -The characters @samp{@{} and @samp{@}}. Curly braces are used in -@code{awk} for delimiting actions, compound statements, and function -bodies.@refill - -@item Data objects -These are numbers and strings of characters. Numbers are converted into -strings and vice versa, as needed.@refill - -@item Escape Sequences -A special sequence of characters used for describing non--printable -characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII -ESC (escape) character. - -@item Field -When @code{awk} reads an input record, it splits the record into pieces -separated by whitespace (or by a separator regexp which you can -change by setting the special variable @code{FS}). Such pieces are -called fields.@refill - -@item Format -Format strings are used to control the appearance of output in the -@code{printf} statement. Also, data conversions from numbers to strings -are controlled by the format string contained in the special variable -@code{OFMT}.@refill - -@item Function -A specialized group of statements often used to encapsulate general -or program--specific tasks. @code{awk} has a number of built--in -functions, and also allows you to define your own. - -@item @code{gawk} -The GNU implementation of @code{awk}. - -@item @code{awk} language -The language in which @code{awk} programs are written. - -@item @code{awk} program -An @code{awk} program consists of a series of @dfn{patterns} and -@dfn{actions}, collectively known as @dfn{rules}. For each input record -given to the program, the program's rules are all processed in turn. -@code{awk} programs may also contain function definitions.@refill - -@item @code{awk} script -Another name for an @code{awk} program. - -@item Input record -A single chunk of data read in by @code{awk}. Usually, an @code{awk} input -record consists of one line of text.@refill - -@item Keyword -In the @code{awk} language, a keyword is a word that has special -meaning. Keywords are reserved and may not be used as variable names. - -The keywords are: -@code{if}, -@code{else}, -@code{while}, -@code{do@dots{}while}, -@code{for}, -@code{for@dots{}in}, -@code{break}, -@code{continue}, -@code{delete}, -@code{next}, -@code{function}, -@code{func}, -and @code{exit}.@refill - -@item Lvalue -An expression that can appear on the left side of an assignment -operator. In most languages, lvalues can be variables or array -elements. In @code{awk}, a field designator can also be used as an -lvalue.@refill - -@item Number -A numeric valued data object. The @code{gawk} implementation uses double -precision floating point to represent numbers.@refill - -@item Pattern -Patterns tell @code{awk} which input records are interesting to which -rules. - -A pattern is an arbitrary conditional expression against which input is -tested. If the condition is satisfied, the pattern is said to @dfn{match} -the input record. A typical pattern might compare the input record against -a regular expression.@refill - -@item Range (of input lines) -A sequence of consecutive lines from the input file. A pattern -can specify ranges of input lines for @code{awk} to process, or it can -specify single lines.@refill - -@item Recursion -When a function calls itself, either directly or indirectly. -If this isn't clear, refer to the entry for ``recursion''. - -@item Redirection -Redirection means performing input from other than the standard input -stream, or output to other than the standard output stream. - -You can redirect the output of the @code{print} and @code{printf} statements -to a file or a system command, using the @code{>}, @code{>>}, and @code{|} -operators. You can redirect input to the @code{getline} statement using -the @code{<} and @code{|} operators.@refill - -@item Regular Expression -See ``regexp''. - -@item Regexp -Short for @dfn{regular expression}. A regexp is a pattern that denotes a -set of strings, possibly an infinite set. For example, the regexp -@samp{R.*xp} matches any string starting with the letter @samp{R} -and ending with the letters @samp{xp}. In @code{awk}, regexps are -used in patterns and in conditional expressions.@refill - -@item Rule -A segment of an @code{awk} program, that specifies how to process single -input records. A rule consists of a @dfn{pattern} and an @dfn{action}. -@code{awk} reads an input record; then, for each rule, if the input record -satisfies the rule's pattern, @code{awk} executes the rule's action. -Otherwise, the rule does nothing for that input record.@refill - -@item Special Variable -The variables @code{ARGC}, @code{ARGV}, @code{ENVIRON}, @code{FILENAME}, -@code{FNR}, @code{FS}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, -@code{ORS}, @code{RLENGTH}, @code{RSTART}, @code{RS}, @code{SUBSEP}, have -special meaning to @code{awk}. Changing some of them affects @code{awk}'s -running environment.@refill - -@item Stream Editor -A program that reads records from an input stream and processes them one -or more at a time. This is in contrast with batch programs, which may -expect to read their input files in entirety before starting to do -anything, and with interactive programs, which require input from the -user.@refill - -@item String -A datum consisting of a sequence of characters, such as @samp{I am a -string}. Constant strings are written with double--quotes in the -@code{awk} language, and may contain @dfn{escape sequences}. - -@item Whitespace -A sequence of blank or tab characters occurring inside an input record or a -string.@refill -@end table - -@node Index, , Glossary, Top -@unnumbered Index -@printindex cp - -@summarycontents -@contents -@bye diff --git a/gawk.toc b/gawk.toc deleted file mode 100644 index b07c6d3b..00000000 --- a/gawk.toc +++ /dev/null @@ -1,104 +0,0 @@ -\unnumbchapentry {Preface}{1} -\unnumbsecentry{History of {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing gawk}\hbox {}}{1} -\unnumbchapentry {GNU GENERAL PUBLIC LICENSE}{3} -\unnumbsecentry{Preamble}{3} -\unnumbsecentry{TERMS AND CONDITIONS}{4} -\unnumbsecentry{Appendix: How to Apply These Terms to Your New Programs}{7} -\chapentry {Using This Manual}{1}{9} -\secentry {Input Files for the Examples}{1}{1}{9} -\chapentry {Getting Started With {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{2}{11} -\secentry {A Very Simple Example}{2}{1}{11} -\secentry {An Example with Two Rules}{2}{2}{12} -\secentry {A More Complex Example}{2}{3}{13} -\secentry {How to Run {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{14} -\subsecentry {One--shot Throw--away {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{1}{15} -\subsecentry {Running {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} without Input Files}{2}{4}{2}{15} -\subsecentry {Running Long Programs}{2}{4}{3}{16} -\subsecentry {Executable {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{4}{4}{17} -\subsecentry {Details of the {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Command Line}{2}{4}{5}{18} -\secentry {Comments in {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Programs}{2}{5}{19} -\secentry {{\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {} Statements versus Lines}{2}{6}{20} -\secentry {When to Use {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{2}{7}{21} -\chapentry {Reading Files (Input)}{3}{23} -\secentry {How Input is Split into Records}{3}{1}{23} -\secentry {Examining Fields}{3}{2}{24} -\secentry {Non-constant Field Numbers}{3}{3}{26} -\secentry {Changing the Contents of a Field}{3}{4}{27} -\secentry {Specifying How Fields Are Separated}{3}{5}{28} -\secentry {Multiple--Line Records}{3}{6}{31} -\secentry {Assigning Variables on the Command Line}{3}{7}{32} -\secentry {Explicit Input with {\fam \ttfam \tentt \rawbackslash \frenchspacing getline}\hbox {}}{3}{8}{32} -\subsecentry {Closing Input Files}{3}{8}{1}{36} -\chapentry {Printing Output}{4}{39} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} Statement}{4}{1}{39} -\secentry {Examples of {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} Statements}{4}{2}{40} -\secentry {Output Separators}{4}{3}{41} -\secentry {Redirecting Output of {\fam \ttfam \tentt \rawbackslash \frenchspacing print}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}}{4}{4}{42} -\subsecentry {Closing Output Files and Pipes}{4}{4}{1}{43} -\secentry {Using {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Statements For Fancier Printing}{4}{5}{44} -\subsecentry {Introduction to the {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Statement}{4}{5}{1}{45} -\subsecentry {Format--Control Characters}{4}{5}{2}{45} -\subsecentry {Modifiers for {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {} Formats}{4}{5}{3}{46} -\subsecentry {Examples of Using {\fam \ttfam \tentt \rawbackslash \frenchspacing printf}\hbox {}}{4}{5}{4}{46} -\chapentry {Useful ``One-liners''}{5}{49} -\chapentry {Patterns}{6}{51} -\secentry {The Empty Pattern}{6}{1}{51} -\secentry {Regular Expressions as Patterns}{6}{2}{52} -\subsecentry {How to use Regular Expressions}{6}{2}{1}{52} -\subsecentry {Regular Expression Operators}{6}{2}{2}{53} -\secentry {Comparison Expressions as Patterns}{6}{3}{55} -\secentry {Specifying Record Ranges With Patterns}{6}{4}{56} -\secentry {{\fam \ttfam \tentt \rawbackslash \frenchspacing BEGIN}\hbox {} and {\fam \ttfam \tentt \rawbackslash \frenchspacing END}\hbox {} Special Patterns}{6}{5}{57} -\secentry {Boolean Operators and Patterns}{6}{6}{58} -\secentry {Conditional Patterns}{6}{7}{59} -\chapentry {Actions: The Basics}{7}{61} -\chapentry {Actions: Expressions}{8}{63} -\secentry {Constant Expressions}{8}{1}{63} -\secentry {Variables}{8}{2}{64} -\secentry {Arithmetic Operators}{8}{3}{65} -\secentry {String Concatenation}{8}{4}{65} -\secentry {Comparison Expressions}{8}{5}{66} -\secentry {Boolean Operators}{8}{6}{67} -\secentry {Assignment Operators}{8}{7}{68} -\secentry {Increment Operators}{8}{8}{70} -\secentry {Conversion of Strings and Numbers}{8}{9}{71} -\secentry {Conditional Expressions}{8}{10}{72} -\secentry {Function Calls}{8}{11}{73} -\chapentry {Actions: Statements}{9}{75} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing if}\hbox {} Statement}{9}{1}{75} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} Statement}{9}{2}{76} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing do}\hbox {}--{\fam \ttfam \tentt \rawbackslash \frenchspacing while}\hbox {} Statement}{9}{3}{77} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing for}\hbox {} Statement}{9}{4}{77} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing break}\hbox {} Statement}{9}{5}{79} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing continue}\hbox {} Statement}{9}{6}{80} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing next}\hbox {} Statement}{9}{7}{81} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing exit}\hbox {} Statement}{9}{8}{82} -\chapentry {Actions: Using Arrays in {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{10}{83} -\secentry {Introduction to Arrays}{10}{1}{83} -\secentry {Referring to an Array Element}{10}{2}{85} -\secentry {Assigning Array Elements}{10}{3}{86} -\secentry {Basic Example of an Array}{10}{4}{86} -\secentry {Scanning All Elements of an Array}{10}{5}{87} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing delete}\hbox {} Statement}{10}{6}{88} -\secentry {Multi--dimensional arrays}{10}{7}{89} -\secentry {Scanning Multi--dimensional Arrays}{10}{8}{91} -\chapentry {Built--in functions}{11}{93} -\secentry {Numeric Built--in Functions}{11}{1}{93} -\secentry {Built--in Functions for String Manipulation}{11}{2}{95} -\secentry {Built--in Functions for I/O to Files and Commands}{11}{3}{98} -\chapentry {User--defined Functions}{12}{99} -\secentry {Syntax of Function Definitions}{12}{1}{99} -\secentry {Function Definition Example}{12}{2}{100} -\secentry {Caveats of Function Calling}{12}{3}{101} -\secentry {The {\fam \ttfam \tentt \rawbackslash \frenchspacing return}\hbox {} statement}{12}{4}{102} -\chapentry {Special Variables}{13}{105} -\secentry {Special Variables That Control {\fam \ttfam \tentt \rawbackslash \frenchspacing awk}\hbox {}}{13}{1}{105} -\secentry {Special Variables That Convey Information to You}{13}{2}{106} -\chapentry {Sample Program}{Appendix \char 65}{109} -\chapentry {Implementation Notes}{Appendix \char 66}{111} -\secentry {GNU Extensions to the AWK Language}{\char 66}{1}{111} -\secentry {Extensions Likely To Appear In A Future Release}{\char 66}{2}{111} -\secentry {Suggestions for Future Improvements}{\char 66}{3}{112} -\secentry {Suggestions For Future Improvements of This Manual}{\char 66}{4}{113} -\chapentry {Glossary}{Appendix \char 67}{115} -\unnumbchapentry {Index}{119} diff --git a/gawk.tp b/gawk.tp deleted file mode 100644 index e69de29b..00000000 --- a/gawk.tp +++ /dev/null diff --git a/gawk.tps b/gawk.tps deleted file mode 100644 index e69de29b..00000000 --- a/gawk.tps +++ /dev/null diff --git a/gawk.vr b/gawk.vr deleted file mode 100644 index 9b2ba722..00000000 --- a/gawk.vr +++ /dev/null @@ -1,17 +0,0 @@ -\entry {ARGV}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ARGV}\hbox {}} -\entry {OFS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}} -\entry {ORS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}} -\entry {RS}{19}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}} -\entry {FILENAME}{23}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FILENAME}\hbox {}} -\entry {RS}{23}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}} -\entry {NR}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}} -\entry {FNR}{24}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}} -\entry {NF}{25}{{\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}} -\entry {FS}{28}{{\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}} -\entry {OFS}{41}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}} -\entry {ORS}{41}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}} -\entry {OFMT}{71}{{\fam \ttfam \tentt \rawbackslash \frenchspacing OFMT}\hbox {}} -\entry {SUBSEP}{89}{{\fam \ttfam \tentt \rawbackslash \frenchspacing SUBSEP}\hbox {}} -\entry {RSTART}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RSTART}\hbox {}} -\entry {RLENGTH}{95}{{\fam \ttfam \tentt \rawbackslash \frenchspacing RLENGTH}\hbox {}} -\entry {ENVIRON}{106}{{\fam \ttfam \tentt \rawbackslash \frenchspacing ENVIRON}\hbox {}} diff --git a/gawk.vrs b/gawk.vrs deleted file mode 100644 index 0ee09c6d..00000000 --- a/gawk.vrs +++ /dev/null @@ -1,21 +0,0 @@ -\initial {A} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ARGV}\hbox {}}{19} -\initial {E} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ENVIRON}\hbox {}}{106} -\initial {F} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FILENAME}\hbox {}}{23} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FNR}\hbox {}}{24} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing FS}\hbox {}}{28} -\initial {N} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing NF}\hbox {}}{25} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing NR}\hbox {}}{24} -\initial {O} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing OFMT}\hbox {}}{71} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing OFS}\hbox {}}{19, 41} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing ORS}\hbox {}}{19, 41} -\initial {R} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RLENGTH}\hbox {}}{95} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RS}\hbox {}}{19, 23} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing RSTART}\hbox {}}{95} -\initial {S} -\entry {{\fam \ttfam \tentt \rawbackslash \frenchspacing SUBSEP}\hbox {}}{89} diff --git a/gnu.getopt.c b/gnu.getopt.c deleted file mode 100644 index 93002de9..00000000 --- a/gnu.getopt.c +++ /dev/null @@ -1,417 +0,0 @@ -/* Getopt for GNU. - Copyright (C) 1987, 1989 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - - - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of `argv' so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable _POSIX_OPTION_ORDER disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include <stdio.h> - -#ifdef sparc -#include <alloca.h> -#endif -#if defined(USG) || defined(MSDOS) -extern char *alloca(); -extern char *strchr(); -#define index strchr -#define bcopy(s, d, l) memcpy((d), (s), (l)) -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg = 0; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns EOF, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -int optind = 0; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - UNSPECIFIED means the caller did not specify anything; - the default is then REQUIRE_ORDER if the environment variable - _OPTIONS_FIRST is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options. - Stop option processing when the first non-option is seen. - This is what Unix does. - - PERMUTE is the default. We permute the contents of `argv' as we scan, - so that eventually all the options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code zero. - Using `-' as the first character of the list of option characters - requests this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return EOF with `optind' != ARGC. */ - -static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering; - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -static void -exchange (argv) - char **argv; -{ - int nonopts_size - = (last_nonopt - first_nonopt) * sizeof (char *); - char **temp = (char **) alloca (nonopts_size); - - /* Interchange the two blocks of data in argv. */ - - bcopy (&argv[first_nonopt], temp, nonopts_size); - bcopy (&argv[last_nonopt], &argv[first_nonopt], - (optind - last_nonopt) * sizeof (char *)); - bcopy (temp, &argv[first_nonopt + optind - last_nonopt], - nonopts_size); - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of theoption characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns `EOF'. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - A colon in OPTSTRING means that the previous character is an option - that wants an argument. The argument is taken from the rest of the - current ARGV-element, or from the following ARGV-element, - and returned in `optarg'. - - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg'. - - If OPTSTRING starts with `-', it requests a different method of handling the - non-option ARGV-elements. See the comments about RETURN_IN_ORDER, above. */ - -int -getopt (argc, argv, optstring) - int argc; - char **argv; - char *optstring; -{ - /* Initialize the internal data when the first call is made. - Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - if (optind == 0) - { - first_nonopt = last_nonopt = optind = 1; - - nextchar = 0; - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - ordering = RETURN_IN_ORDER; - else if (getenv ("_POSIX_OPTION_ORDER") != 0) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - } - - if (nextchar == 0 || *nextchar == 0) - { - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange (argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Now skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc - && (argv[optind][0] != '-' - || argv[optind][1] == 0)) - optind++; - last_nonopt = optind; - } - - /* Special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange (argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return EOF; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if (argv[optind][0] != '-' || argv[optind][1] == 0) - { - if (ordering == REQUIRE_ORDER) - return EOF; - optarg = argv[optind++]; - return 0; - } - - /* We have found another option-ARGV-element. - Start decoding its characters. */ - - nextchar = argv[optind] + 1; - } - - /* Look at and handle the next option-character. */ - - { - char c = *nextchar++; - char *temp = (char *) index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == 0) - optind++; - - if (temp == 0 || c == ':') - { - if (opterr != 0) - { - if (c < 040 || c >= 0177) - fprintf (stderr, "%s: unrecognized option, character code 0%o\n", - argv[0], c); - else - fprintf (stderr, "%s: unrecognized option `-%c'\n", - argv[0], c); - } - return '?'; - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != 0) - { - optarg = nextchar; - optind++; - } - else - optarg = 0; - nextchar = 0; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != 0) - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (opterr != 0) - fprintf (stderr, "%s: no argument for `-%c' option\n", - argv[0], c); - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = 0; - } - } - return c; - } -} - -#ifdef TEST - -/* Compile with -DTEST to make an executable for use in testing - the above definition of `getopt'. */ - -int -main (argc, argv) - int argc; - char **argv; -{ - char c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind; - if ((c = getopt (argc, argv, "abc:d:0123456789")) == EOF) - break; - - switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - return 0; -} - -#endif /* TEST */ - diff --git a/makefile.pc b/makefile.pc deleted file mode 100644 index b812dad4..00000000 --- a/makefile.pc +++ /dev/null @@ -1,169 +0,0 @@ -# Makefile for GNU Awk (for use with Microsoft C V5.1) -# -# Rewritten by Arnold Robbins, September 1988, March 1989. -# -# Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Progamming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 1, or (at your option) -# any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GAWK; see the file COPYING. If not, write to -# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - -# CFLAGS: options to the C compiler -# -# -Ox optimize -# -Zi include debugging info (include /CO in LINKFLAFS) -# -AL compile in large model -# -AS compile in small model -# -# -DNOVPRINTF - system has no vprintf and associated routines -# -DHASDOPRNT - system needs version of vprintf et al. defined in awk5.c -# and has a BSD compatable doprnt() -# -DNOMEMCMP - system lacks memcmp() -# -DUSG - system is generic-ish System V. -# -CC=cl -LINKFLAGS= /E /FAR /PAC /NOE /NOI /st:0x1800 -#LINKFLAGS= /CO /NOE /NOI /st:0x1800 -OPTIMIZE= -AL -Ox -DEBUG=#-DDEBUG #-DFUNC_TRACE -DMEMDEBUG -DEBUGGER= #-Zi -PROFILE=#-pg -SYSV= -BSD=#-DHASDOPRNT -MEMCMP=#-DNOMEMCMP -VPRINTF=#-DNOVPRINTF - -FLAGS= $(OPTIMIZE) $(SYSV) $(DEBUG) $(BSD) $(MEMCMP) $(VPRINTF) -CFLAGS= $(FLAGS) $(DEBUGGER) $(PROFILE) -LDFLAGS= #-Bstatic - -SRC = awk1.c awk2.c awk3.c awk4.c awk5.c \ - awk6.c awk7.c awk8.c awk9.c regex.c version.c do_free.c awka.c - -PCSTUFF= makefile.pc names.lnk random.c - -AWKOBJS = awk1.obj awk2.obj awk3.obj awk4.obj awk5.obj awk6.obj awk7.obj \ - awk8.obj awk9.obj version.obj awka.obj # do_free.obj # used for MEMDEBUG -ALLOBJS = $(AWKOBJS) awk_tab.obj - -# Parser to use on grammar -- if you don't have bison use the first one -PARSER = yacc -PARSOUT=ytab.c -#PARSER = bison -y -#PARSOUT=y_tab.c -#PARSFLAGS=-v - -# S5OBJS -# Set equal to alloca.o if your system is S5 and you don't have -# alloca. Uncomment the rule below to actually make alloca.o. -S5OBJS= - -# GETOPT -# Set equal to getopt.o if you have a generic BSD system. The -# generic BSD getopt is reported to not work with gawk. The -# gnu getopt is supplied in gnu.getopt.c. The Public Domain -# getopt from AT&T is in att.getopt.c. Choose one of these, -# and rename it getopt.c. -GETOPT=getopt.obj - -# LIBOBJS -# Stuff that awk uses as library routines, but not in /lib/libc.a. -LIBOBJS= regex.obj random.obj $(S5OBJS) $(GETOPT) - -UPDATES = Makefile awk.h awk.y \ - $(SRC) regex.c regex.h - -INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \ - gawk-info-5 gawk.aux gawk.cp gawk.cps gawk.dvi gawk.fn gawk.fns \ - gawk.ky gawk.kys gawk.pg gawk.pgs gawk.texinfo gawk.toc \ - gawk.tp gawk.tps gawk.vr gawk.vrs - -# DOCS -# Documentation for users -# -DOCS=gawk.1 $(INFOFILES) - -# We don't distribute shar files, but they're useful for mailing. -SHARS = $(DOCS) COPYING README PROBLEMS $(UPDATES) awk.tab.c \ - alloca.s alloca.c att_getopt.c gnu_getopt.c $(PCSTUFF) - -gawk: $(ALLOBJS) $(LIBOBJS) - link @names.lnk,gawk.exe $(LINKFLAGS); - -# this kludge necessary because MSC 5.1 compiler bombs with -Oa or -Ol (where -# -Ox == "-Oailt -Gs") -regex.obj: regex.c - $(CC) -c -Oit -AL regex.c - -$(AWKOBJS): awk.h - -awk_tab.obj: awk.h awk_tab.c - -awk_tab.c: awk.y - @-del awk_tab.c - $(PARSER) $(PARSFLAGS) awk.y - -rename $(PARSOUT) awk_tab.c - -# Alloca: uncomment this if your system (notably System V boxen) -# does not have alloca in /lib/libc.a -# -#alloca.o: alloca.s -# /lib/cpp < alloca.s | sed '/^#/d' > t.s -# as t.s -o alloca.o -# rm t.s - -# If your machine is not supported by the assembly version of alloca.s, -# use the C version instead. This uses the default rules to make alloca.o. -# -#alloca.o: alloca.c - -lint: $(SRC) - lint -hcbax $(FLAGS) $(SRC) awk_tab.c - -clean: - rm -f gawk *.obj core awk.output gmon.out make.out #awk_tab.c - -awk.shar: $(SHARS) - shar -f awk -c $(SHARS) - -awk.tar: $(SHARS) - tar cvf awk.tar $(SHARS) - -updates.tar: $(UPDATES) - tar cvf gawk.tar $(UPDATES) - -awk.tar.Z: awk.tar - compress < awk.tar > awk.tar.Z - -doc: $(DOCS) - nroff -man $(DOCS) | col > $(DOCS).out - -# This command probably won't be useful to the rest of the world, but makes -# life much easier for me. -dist: awk.tar awk.tar.Z - -diff: - for i in RCS/*; do rcsdiff -c -b $$i > `basename $$i ,v`.diff; done - -update: $(UPDATES) - sendup $? - touch update - -release: $(SHARS) - -rm -fr gawk-dist - mkdir gawk-dist - cp $(SHARS) gawk-dist - tar -cvf - gawk-dist | compress > dist.tar.Z |