diff options
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | awk.h | 2 | ||||
-rw-r--r-- | field.c | 6 | ||||
-rw-r--r-- | io.c | 51 |
4 files changed, 46 insertions, 28 deletions
@@ -19,6 +19,21 @@ that one may dump SYMTAB or FUNCTAB. Issue reported by zhou shuiqing <zhoushuiqing321@outlook.com>. +2023-03-18 Miguel Pineiro Jr. <mpj@pineiro.cc> + + Allow records longer than INT_MAX. For test cases, + see https://lists.gnu.org/archive/html/bug-gawk/2021-05/msg00003.html. + + * awk.h (set_record): `cnt' is now size_t. + * field.c (set_record): `cnt' is now size_t. Adjust databuf_size + and MAX_SIZE macro. + * io.c (get_a_record): Add new second parameter, size_t *len, + which holds the size. Adjust return value meaning. Deal with + return from an extension's get_a_record() function. + (do_getline_redir): Adjust calling get_a_record(). + (do_getline): Ditto. + (inrec): Ditto. + 2023-03-09 Arnold D. Robbins <arnold@skeeve.com> * gawkapi.h: Update copyright year. Small edit in leading comment. @@ -1569,7 +1569,7 @@ extern NODE *get_actual_argument(NODE *, int, bool); #endif /* field.c */ extern void init_fields(void); -extern void set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *); +extern void set_record(const char *buf, size_t cnt, const awk_fieldwidth_info_t *); extern void reset_record(void); extern void rebuild_record(void); extern void set_NF(void); @@ -261,13 +261,13 @@ rebuild_record() * but better correct than fast. */ void -set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *fw) +set_record(const char *buf, size_t cnt, const awk_fieldwidth_info_t *fw) { NODE *n; static char *databuf; - static unsigned long databuf_size; + static size_t databuf_size; #define INITIAL_SIZE 512 -#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */ +#define MAX_SIZE ((size_t) ~0) /* maximally portable ... */ purge_record(); @@ -267,7 +267,7 @@ static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan; -static int get_a_record(char **out, IOBUF *iop, int *errcode, const awk_fieldwidth_info_t **field_width); +static int get_a_record(char **out, size_t *len, IOBUF *iop, int *errcode, const awk_fieldwidth_info_t **field_width); static void free_rp(struct redirect *rp); @@ -565,21 +565,19 @@ bool inrec(IOBUF *iop, int *errcode) { char *begin; - int cnt; - bool retval = true; + size_t cnt; + bool retval; const awk_fieldwidth_info_t *field_width = NULL; if (at_eof(iop) && no_data_left(iop)) - cnt = EOF; + retval = false; else if ((iop->flag & IOP_CLOSED) != 0) - cnt = EOF; + retval = false; else - cnt = get_a_record(& begin, iop, errcode, & field_width); + /* Note that get_a_record may return -2 when I/O would block */ + retval = (get_a_record(& begin, & cnt, iop, errcode, & field_width) == 0); - /* Note that get_a_record may return -2 when I/O would block */ - if (cnt < 0) { - retval = false; - } else { + if (retval) { INCREMENT_REC(NR); INCREMENT_REC(FNR); set_record(begin, cnt, field_width); @@ -2808,7 +2806,8 @@ do_getline_redir(int into_variable, enum redirval redirtype) { struct redirect *rp = NULL; IOBUF *iop; - int cnt = EOF; + size_t cnt; + int retval = EOF; char *s = NULL; int errcode; NODE *redir_exp = NULL; @@ -2843,14 +2842,14 @@ do_getline_redir(int into_variable, enum redirval redirtype) return make_number((AWKNUM) 0.0); errcode = 0; - cnt = get_a_record(& s, iop, & errcode, (lhs ? NULL : & field_width)); + retval = get_a_record(& s, & cnt, iop, & errcode, (lhs ? NULL : & field_width)); if (errcode != 0) { if (! do_traditional && (errcode != -1)) update_ERRNO_int(errcode); - return make_number((AWKNUM) cnt); + return make_number((AWKNUM) retval); } - if (cnt == EOF) { + if (retval == EOF) { /* * Don't do iop_close() here if we are * reading from a pipe; otherwise @@ -2882,7 +2881,8 @@ do_getline_redir(int into_variable, enum redirval redirtype) NODE * do_getline(int into_variable, IOBUF *iop) { - int cnt = EOF; + size_t cnt; + int retval = EOF; char *s = NULL; int errcode; const awk_fieldwidth_info_t *field_width = NULL; @@ -2894,16 +2894,16 @@ do_getline(int into_variable, IOBUF *iop) } errcode = 0; - cnt = get_a_record(& s, iop, & errcode, (into_variable ? NULL : & field_width)); + retval = get_a_record(& s, & cnt, iop, & errcode, (into_variable ? NULL : & field_width)); if (errcode != 0) { if (! do_traditional && (errcode != -1)) update_ERRNO_int(errcode); if (into_variable) (void) POP_ADDRESS(); - return make_number((AWKNUM) cnt); + return make_number((AWKNUM) retval); } - if (cnt == EOF) + if (retval == EOF) return NULL; /* try next file */ INCREMENT_REC(NR); INCREMENT_REC(FNR); @@ -3851,13 +3851,14 @@ errno_io_retry(void) /* * get_a_record --- read a record from IOP into out, - * return length or EOF, set RT. + * its length into len, and set RT. + * return 0 on success, EOF when out of data, and -2 if I/O would block. * Note that errcode is never NULL, and the caller initializes *errcode to 0. - * If I/O would block, return -2. */ static int get_a_record(char **out, /* pointer to pointer to data */ + size_t *len, /* pointer to record length */ IOBUF *iop, /* input IOP */ int *errcode, /* pointer to error variable */ const awk_fieldwidth_info_t **field_width) @@ -3866,7 +3867,6 @@ get_a_record(char **out, /* pointer to pointer to data */ struct recmatch recm; SCANSTATE state; RECVALUE ret; - int retval; NODE *rtval = NULL; static RECVALUE (*lastmatchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = NULL; @@ -3885,6 +3885,9 @@ get_a_record(char **out, /* pointer to pointer to data */ if (rc == EOF) iop->flag |= IOP_AT_EOF; else { + assert(rc >= 0); + *len = rc; + rc = 0; if (rt_len != 0) set_RT(rt_start, rt_len); else @@ -4044,11 +4047,11 @@ get_a_record(char **out, /* pointer to pointer to data */ if (recm.len == 0) { *out = NULL; - retval = 0; + *len = 0; } else { assert(recm.start != NULL); *out = recm.start; - retval = recm.len; + *len = recm.len; } iop->off += recm.len + recm.rt_len; @@ -4056,7 +4059,7 @@ get_a_record(char **out, /* pointer to pointer to data */ if (recm.len == 0 && recm.rt_len == 0 && at_eof(iop)) return EOF; else - return retval; + return 0; } /* set_RS --- update things as appropriate when RS is set */ |