From 8c042f99cc7465c86351d21331a129111b75345d Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 16 Jul 2010 12:41:09 +0300 Subject: Move to gawk-3.0.0. --- field.c | 390 ++++++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 243 insertions(+), 147 deletions(-) (limited to 'field.c') diff --git a/field.c b/field.c index fa7dc3e9..4b638b2c 100644 --- a/field.c +++ b/field.c @@ -6,7 +6,7 @@ * Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the - * AWK Progamming Language. + * AWK Programming Language. * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,44 +19,48 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GAWK; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include "awk.h" -typedef void (* Setfunc) P((int, char*, int, NODE *)); +typedef void (* Setfunc) P((long, char *, long, NODE *)); -static long (*parse_field) P((int, char **, int, NODE *, +static long (*parse_field) P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); static void rebuild_record P((void)); -static long re_parse_field P((int, char **, int, NODE *, +static long re_parse_field P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); -static long def_parse_field P((int, char **, int, NODE *, +static long def_parse_field P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); -static long sc_parse_field P((int, char **, int, NODE *, +static long null_parse_field P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); -static long fw_parse_field P((int, char **, int, NODE *, +static long sc_parse_field P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); -static void set_element P((int, char *, int, NODE *)); +static long fw_parse_field P((long, char **, int, NODE *, + Regexp *, Setfunc, NODE *)); +static void set_element P((long num, char * str, long len, NODE *arr)); static void grow_fields_arr P((long num)); -static void set_field P((int num, char *str, int len, NODE *dummy)); +static void set_field P((long num, char *str, long len, NODE *dummy)); -static Regexp *FS_regexp = NULL; static char *parse_extent; /* marks where to restart parse of record */ -static long parse_high_water=0; /* field number that we have parsed so far */ +static long parse_high_water = 0; /* field number that we have parsed so far */ static long nf_high_water = 0; /* size of fields_arr */ static int resave_fs; static NODE *save_FS; /* save current value of FS when line is read, * to be used in deferred parsing */ +static NODE **nodes; /* permanent repository of field nodes */ +static int *FIELDWIDTHS = NULL; NODE **fields_arr; /* array of pointers to the field nodes */ int field0_valid; /* $(>0) has not been changed yet */ -int default_FS; -static NODE **nodes; /* permanent repository of field nodes */ -static int *FIELDWIDTHS = NULL; +int default_FS; /* 1 when FS == " " */ +Regexp *FS_regexp = NULL; + +/* init_fields --- set up the fields array to start with */ void init_fields() @@ -67,12 +71,14 @@ init_fields() emalloc(nodes, NODE **, sizeof(NODE *), "init_fields"); getnode(n); *n = *Nnull_string; + n->flags |= SCALAR; fields_arr[0] = nodes[0] = n; parse_extent = fields_arr[0]->stptr; save_FS = dupnode(FS_node->var_value); - field0_valid = 1; + field0_valid = TRUE; } +/* grow_fields --- acquire new fields as needed */ static void grow_fields_arr(num) @@ -81,22 +87,25 @@ long num; register int t; register NODE *n; - erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); - erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field"); - for (t = nf_high_water+1; t <= num; t++) { + erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr"); + erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "grow_fields_arr"); + for (t = nf_high_water + 1; t <= num; t++) { getnode(n); *n = *Nnull_string; + n->flags |= SCALAR; fields_arr[t] = nodes[t] = n; } nf_high_water = num; } +/* set_field --- set the value of a particular field */ + /*ARGSUSED*/ static void set_field(num, str, len, dummy) -int num; +long num; char *str; -int len; +long len; NODE *dummy; /* not used -- just to make interface same as set_element */ { register NODE *n; @@ -110,30 +119,34 @@ NODE *dummy; /* not used -- just to make interface same as set_element */ fields_arr[num] = n; } -/* Someone assigned a value to $(something). Fix up $0 to be right */ +/* rebuild_record --- Someone assigned a value to $(something). + Fix up $0 to be right */ + static void rebuild_record() { - register size_t tlen; + /* + * use explicit unsigned longs for lengths, in case + * a size_t isn't big enough. + */ + register unsigned long tlen; + register unsigned long ofslen; register NODE *tmp; NODE *ofs; char *ops; register char *cops; register NODE **ptr; - register size_t ofslen; tlen = 0; ofs = force_string(OFS_node->var_value); ofslen = ofs->stlen; - ptr = &fields_arr[NF]; - while (ptr > &fields_arr[0]) { + for (ptr = &fields_arr[NF]; ptr > &fields_arr[0]; ptr--) { tmp = force_string(*ptr); tlen += tmp->stlen; - ptr--; } tlen += (NF - 1) * ofslen; - if ((long)tlen < 0) - tlen = 0; + if ((long) tlen < 0) + tlen = 0; emalloc(ops, char *, tlen + 2, "rebuild_record"); cops = ops; ops[0] = '\0'; @@ -157,10 +170,11 @@ rebuild_record() tmp = make_str_node(ops, tlen, ALREADY_MALLOCED); unref(fields_arr[0]); fields_arr[0] = tmp; - field0_valid = 1; + field0_valid = TRUE; } /* + * set_record: * setup $0, but defer parsing rest of line until reference is made to $(>0) * or to NF. At that point, parse only as much as necessary. */ @@ -173,17 +187,21 @@ int freeold; register int i; NF = -1; - for (i = 1; i <= parse_high_water; i++) { + for (i = 1; i <= parse_high_water; i++) unref(fields_arr[i]); - } + parse_high_water = 0; + /* + * $0 = $0 should resplit using the current value of FS, thus, + * this if is executed orthogonally to the value of freeold. + */ + if (resave_fs) { + resave_fs = FALSE; + unref(save_FS); + save_FS = dupnode(FS_node->var_value); + } if (freeold) { unref(fields_arr[0]); - if (resave_fs) { - resave_fs = 0; - unref(save_FS); - save_FS = dupnode(FS_node->var_value); - } nodes[0]->stptr = buf; nodes[0]->stlen = cnt; nodes[0]->stref = 1; @@ -191,16 +209,20 @@ int freeold; fields_arr[0] = nodes[0]; } fields_arr[0]->flags |= MAYBE_NUM; - field0_valid = 1; + field0_valid = TRUE; } +/* reset_record --- start over again with current $0 */ + void reset_record() { (void) force_string(fields_arr[0]); - set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0); + set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, FALSE); } +/* set_NF --- handle what happens to $0 and fields when NF is changed */ + void set_NF() { @@ -213,17 +235,19 @@ set_NF() unref(fields_arr[i]); fields_arr[i] = Nnull_string; } - field0_valid = 0; + field0_valid = FALSE; } /* - * this is called both from get_field() and from do_split() + * re_parse_field --- parse fields using a regexp. + * + * This is called both from get_field() and from do_split() * via (*parse_field)(). This variation is for when FS is a regular * expression -- either user-defined or because RS=="" and FS==" " */ static long re_parse_field(up_to, buf, len, fs, rp, set, n) -int up_to; /* parse only up to this field number */ +long up_to; /* parse only up to this field number */ char **buf; /* on input: string to parse; on output: point to start next */ int len; NODE *fs; @@ -232,7 +256,7 @@ Setfunc set; /* routine to set the value of the parsed field */ NODE *n; { register char *scan = *buf; - register int nf = parse_high_water; + register long nf = parse_high_water; register char *field; register char *end = scan + len; @@ -241,31 +265,31 @@ NODE *n; if (len == 0) return nf; - if (*RS == 0 && default_FS) + if (RS_is_null && default_FS) while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n')) scan++; field = scan; while (scan < end - && research(rp, scan, 0, (end - scan), 1) != -1 + && research(rp, scan, 0, (end - scan), TRUE) != -1 && nf < up_to) { if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ scan++; if (scan == end) { - (*set)(++nf, field, (int)(scan - field), n); + (*set)(++nf, field, (long)(scan - field), n); up_to = nf; break; } continue; } (*set)(++nf, field, - (int)(scan + RESTART(rp, scan) - field), n); + (long)(scan + RESTART(rp, scan) - field), n); scan += REEND(rp, scan); field = scan; if (scan == end) /* FS at end of record */ - (*set)(++nf, field, 0, n); + (*set)(++nf, field, 0L, n); } if (nf != up_to && scan < end) { - (*set)(++nf, scan, (int)(end - scan), n); + (*set)(++nf, scan, (long)(end - scan), n); scan = end; } *buf = scan; @@ -273,13 +297,15 @@ NODE *n; } /* - * this is called both from get_field() and from do_split() + * def_parse_field --- default field parsing. + * + * This is called both from get_field() and from do_split() * via (*parse_field)(). This variation is for when FS is a single space * character. */ static long def_parse_field(up_to, buf, len, fs, rp, set, n) -int up_to; /* parse only up to this field number */ +long up_to; /* parse only up to this field number */ char **buf; /* on input: string to parse; on output: point to start next */ int len; NODE *fs; @@ -288,7 +314,7 @@ Setfunc set; /* routine to set the value of the parsed field */ NODE *n; { register char *scan = *buf; - register int nf = parse_high_water; + register long nf = parse_high_water; register char *field; register char *end = scan + len; char sav; @@ -324,7 +350,7 @@ NODE *n; field = scan; while (*scan != ' ' && *scan != '\t') scan++; - (*set)(++nf, field, (int)(scan - field), n); + (*set)(++nf, field, (long)(scan - field), n); if (scan == end) break; } @@ -337,13 +363,47 @@ NODE *n; } /* - * this is called both from get_field() and from do_split() + * null_parse_field --- each character is a separate field + * + * This is called both from get_field() and from do_split() + * via (*parse_field)(). This variation is for when FS is the null string. + */ +static long +null_parse_field(up_to, buf, len, fs, rp, set, n) +long up_to; /* parse only up to this field number */ +char **buf; /* on input: string to parse; on output: point to start next */ +int len; +NODE *fs; +Regexp *rp; +Setfunc set; /* routine to set the value of the parsed field */ +NODE *n; +{ + register char *scan = *buf; + register long nf = parse_high_water; + register char *end = scan + len; + + if (up_to == HUGE) + nf = 0; + if (len == 0) + return nf; + + for (; nf < up_to && scan < end; scan++) + (*set)(++nf, scan, 1L, n); + + *buf = scan; + return nf; +} + +/* + * sc_parse_field --- single character field separator + * + * This is called both from get_field() and from do_split() * via (*parse_field)(). This variation is for when FS is a single character * other than space. */ static long sc_parse_field(up_to, buf, len, fs, rp, set, n) -int up_to; /* parse only up to this field number */ +long up_to; /* parse only up to this field number */ char **buf; /* on input: string to parse; on output: point to start next */ int len; NODE *fs; @@ -353,9 +413,10 @@ NODE *n; { register char *scan = *buf; register char fschar; - register int nf = parse_high_water; + register long nf = parse_high_water; register char *field; register char *end = scan + len; + int onecase; char sav; if (up_to == HUGE) @@ -363,11 +424,15 @@ NODE *n; if (len == 0) return nf; - if (*RS == 0 && fs->stlen == 0) + if (RS_is_null && fs->stlen == 0) fschar = '\n'; else fschar = fs->stptr[0]; + onecase = (IGNORECASE && isalpha(fschar)); + if (onecase) + fschar = casetable[fschar]; + /* before doing anything save the char at *end */ sav = *end; /* because it will be destroyed now: */ @@ -375,14 +440,19 @@ NODE *n; for (; nf < up_to;) { field = scan; - while (*scan != fschar) - scan++; - (*set)(++nf, field, (int)(scan - field), n); + if (onecase) { + while (casetable[*scan] != fschar) + scan++; + } else { + while (*scan != fschar) + scan++; + } + (*set)(++nf, field, (long)(scan - field), n); if (scan == end) break; scan++; if (scan == end) { /* FS at end of record */ - (*set)(++nf, field, 0, n); + (*set)(++nf, field, 0L, n); break; } } @@ -395,12 +465,14 @@ NODE *n; } /* - * this is called both from get_field() and from do_split() + * fw_parse_field --- field parsing using FIELDWIDTHS spec + * + * This is called both from get_field() and from do_split() * via (*parse_field)(). This variation is for fields are fixed widths. */ static long fw_parse_field(up_to, buf, len, fs, rp, set, n) -int up_to; /* parse only up to this field number */ +long up_to; /* parse only up to this field number */ char **buf; /* on input: string to parse; on output: point to start next */ int len; NODE *fs; @@ -419,7 +491,7 @@ NODE *n; for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { if (len > end - scan) len = end - scan; - (*set)(++nf, scan, len, n); + (*set)(++nf, scan, (long) len, n); scan += len; } if (len == -1) @@ -429,9 +501,11 @@ NODE *n; return nf; } +/* get_field --- return a particular $n */ + NODE ** get_field(requested, assign) -register int requested; +register long requested; Func_ptr *assign; /* this field is on the LHS of an assign */ { /* @@ -439,27 +513,28 @@ Func_ptr *assign; /* this field is on the LHS of an assign */ * then the whole line must be rebuilt */ if (requested == 0) { - if (!field0_valid) { + if (! field0_valid) { /* first, parse remainder of input record */ if (NF == -1) { NF = (*parse_field)(HUGE-1, &parse_extent, fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr), save_FS, FS_regexp, set_field, - (NODE *)NULL); + (NODE *) NULL); parse_high_water = NF; } rebuild_record(); + reset_record(); /* clear out fields array */ } - if (assign) + if (assign != NULL) *assign = reset_record; return &fields_arr[0]; } /* assert(requested > 0); */ - if (assign) - field0_valid = 0; /* $0 needs reconstruction */ + if (assign != NULL) + field0_valid = FALSE; /* $0 needs reconstruction */ if (requested <= parse_high_water) /* already parsed this field */ return &fields_arr[requested]; @@ -472,11 +547,11 @@ Func_ptr *assign; /* this field is on the LHS of an assign */ if (parse_high_water == 0) /* starting at the beginning */ parse_extent = fields_arr[0]->stptr; parse_high_water = (*parse_field)(requested, &parse_extent, - fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), - save_FS, FS_regexp, set_field, (NODE *)NULL); + fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr), + save_FS, FS_regexp, set_field, (NODE *) NULL); /* - * if we reached the end of the record, set NF to the number of + * if we reached the end of the record, set NF to the number of * fields so far. Note that requested might actually refer to * a field that is beyond the end of the record, but we won't * set NF to that value at this point, since this is only a @@ -489,7 +564,7 @@ Func_ptr *assign; /* this field is on the LHS of an assign */ requested = parse_high_water; } if (parse_high_water < requested) { /* requested beyond end of record */ - if (assign) { /* expand record */ + if (assign != NULL) { /* expand record */ register int i; if (requested > nf_high_water) @@ -508,11 +583,13 @@ Func_ptr *assign; /* this field is on the LHS of an assign */ return &fields_arr[requested]; } +/* set_element --- set an array element, used by do_split() */ + static void set_element(num, s, len, n) -int num; +long num; char *s; -int len; +long len; NODE *n; { register NODE *it; @@ -522,6 +599,8 @@ NODE *n; *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it; } +/* do_split --- implement split(), semantics are same as for field splitting */ + NODE * do_split(tree) NODE *tree; @@ -529,7 +608,7 @@ NODE *tree; NODE *t1, *t2, *t3, *tmp; NODE *fs; char *s; - long (*parseit)P((int, char **, int, NODE *, + long (*parseit) P((long, char **, int, NODE *, Regexp *, Setfunc, NODE *)); Regexp *rp = NULL; @@ -538,7 +617,7 @@ NODE *tree; * do dupnode(), to avoid problems like * x = split(a[1], a, "blah") * since we assoc_clear the array. gack. - * this also gives up complete call by value semantics. + * this also gives us complete call by value semantics. */ tmp = tree_eval(tree->lnode); t1 = dupnode(tmp); @@ -552,7 +631,7 @@ NODE *tree; if (t2->type == Node_param_list) t2 = stack_ptr[t2->param_cnt]; if (t2->type != Node_var && t2->type != Node_var_array) - fatal("second argument of split is not a variable"); + fatal("second argument of split is not an array"); assoc_clear(t2); if (t3->re_flags & FS_DFLT) { @@ -561,7 +640,9 @@ NODE *tree; rp = FS_regexp; } else { tmp = force_string(tree_eval(t3->re_exp)); - if (tmp->stlen == 1) { + if (tmp->stlen == 0) + parseit = null_parse_field; + else if (tmp->stlen == 1) { if (tmp->stptr[0] == ' ') parseit = def_parse_field; else @@ -574,75 +655,14 @@ NODE *tree; } s = t1->stptr; - tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen, + tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) t1->stlen, fs, rp, set_element, t2)); unref(t1); free_temp(t3); return tmp; } -void -set_FS() -{ - char buf[10]; - NODE *fs; - - /* - * If changing the way fields are split, obey least-suprise - * semantics, and force $0 to be split totally. - */ - if (fields_arr != NULL) - (void) get_field(HUGE - 1, 0); - - buf[0] = '\0'; - default_FS = 0; - if (FS_regexp) { - refree(FS_regexp); - FS_regexp = NULL; - } - fs = force_string(FS_node->var_value); - if (fs->stlen > 1) - parse_field = re_parse_field; - else if (*RS == 0) { - parse_field = sc_parse_field; - if (fs->stlen == 1) { - if (fs->stptr[0] == ' ') { - default_FS = 1; - strcpy(buf, "[ \t\n]+"); - } else if (fs->stptr[0] != '\n') - sprintf(buf, "[%c\n]", fs->stptr[0]); - } - } else { - parse_field = def_parse_field; - if (fs->stptr[0] == ' ' && fs->stlen == 1) - default_FS = 1; - else if (fs->stptr[0] != ' ' && fs->stlen == 1) { - if (IGNORECASE == 0) - parse_field = sc_parse_field; - else if (fs->stptr[0] == '\\') - /* yet another special case */ - strcpy(buf, "[\\\\]"); - else - sprintf(buf, "[%c]", fs->stptr[0]); - } - } - if (buf[0]) { - FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, 1); - parse_field = re_parse_field; - } else if (parse_field == re_parse_field) { - FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, 1); - } else - FS_regexp = NULL; - resave_fs = 1; -} - -void -set_RS() -{ - (void) force_string(RS_node->var_value); - RS = RS_node->var_value->stptr; - set_FS(); -} +/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */ void set_FIELDWIDTHS() @@ -651,14 +671,14 @@ set_FIELDWIDTHS() char *end; register int i; static int fw_alloc = 1; - static int warned = 0; + static int warned = FALSE; extern double strtod(); if (do_lint && ! warned) { - warned = 1; + warned = TRUE; warning("use of FIELDWIDTHS is a gawk extension"); } - if (do_unix) /* quick and dirty, does the trick */ + if (do_traditional) /* quick and dirty, does the trick */ return; /* @@ -693,3 +713,79 @@ set_FS_if_not_FIELDWIDTHS() if (parse_field != fw_parse_field) set_FS(); } + +/* set_FS --- handle things when FS is assigned to */ + +void +set_FS() +{ + char buf[10]; + NODE *fs; + static NODE *save_fs = NULL; + static NODE *save_rs = NULL; + + /* + * If changing the way fields are split, obey least-suprise + * semantics, and force $0 to be split totally. + */ + if (fields_arr != NULL) + (void) get_field(HUGE - 1, 0); + + if (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0 + && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0) + return; + unref(save_fs); + save_fs = dupnode(FS_node->var_value); + unref(save_rs); + save_rs = dupnode(RS_node->var_value); + resave_fs = TRUE; + buf[0] = '\0'; + default_FS = FALSE; + if (FS_regexp) { + refree(FS_regexp); + FS_regexp = NULL; + } + fs = force_string(FS_node->var_value); + if (! do_traditional && fs->stlen == 0) + parse_field = null_parse_field; + else if (fs->stlen > 1) + parse_field = re_parse_field; + else if (RS_is_null) { + parse_field = sc_parse_field; + if (fs->stlen == 1) { + if (fs->stptr[0] == ' ') { + default_FS = TRUE; + strcpy(buf, "[ \t\n]+"); + } else if (fs->stptr[0] != '\n') + sprintf(buf, "[%c\n]", fs->stptr[0]); + } + } else { + parse_field = def_parse_field; + if (fs->stptr[0] == ' ' && fs->stlen == 1) + default_FS = TRUE; + else if (fs->stptr[0] != ' ' && fs->stlen == 1) { + if (! IGNORECASE) + parse_field = sc_parse_field; + else if (fs->stptr[0] == '\\') + /* yet another special case */ + strcpy(buf, "[\\\\]"); + else + sprintf(buf, "[%c]", fs->stptr[0]); + } + } + if (buf[0] != '\0') { + FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, TRUE); + parse_field = re_parse_field; + } else if (parse_field == re_parse_field) { + FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, TRUE); + } else + FS_regexp = NULL; +} + +/* using_fieldwidths --- is FS or FIELDWIDTHS in use? */ + +int +using_fieldwidths() +{ + return parse_field == fw_parse_field; +} -- cgit v1.2.1