summaryrefslogtreecommitdiff
path: root/sed/sed.h
blob: 7afa071b4d20dc1bc8dc078bcfd3390d2079cb98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*  GNU SED, a batch stream editor.
    Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
    Free Software Foundation, Inc.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "basicdefs.h"
#include "regex.h"

#ifndef BOOTSTRAP
#include <stdio.h>
#include "unlocked-io.h"
#endif

#include "utils.h"

/* Struct vector is used to describe a compiled sed program. */
struct vector {
  struct sed_cmd *v;	/* a dynamically allocated array */
  size_t v_allocated;	/* ... number slots allocated */
  size_t v_length;	/* ... number of slots in use */
};

/* This structure tracks files used by sed so that they may all be
   closed cleanly at normal program termination.  A flag is kept that tells
   if a missing newline was encountered, so that it is added on the
   next line and the two lines are not concatenated.  */
struct output {
  char *name;
  bool missing_newline;
  FILE *fp;
  struct output *link;
};

struct text_buf {
  char *text;
  size_t text_length;
};

struct regex {
  regex_t pattern;
  int flags;
  size_t sz;
  char re[1];
};
  
enum replacement_types {
  REPL_ASIS = 0,
  REPL_UPPERCASE = 1,
  REPL_LOWERCASE = 2,
  REPL_UPPERCASE_FIRST = 4,
  REPL_LOWERCASE_FIRST = 8,
  REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,

  /* These are given to aid in debugging */
  REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
  REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
  REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
  REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
};

enum text_types {
  TEXT_BUFFER,
  TEXT_REPLACEMENT,
  TEXT_REGEX
};

enum posixicity_types {
  POSIXLY_EXTENDED,	/* with GNU extensions */
  POSIXLY_CORRECT,	/* with POSIX-compatible GNU extensions */
  POSIXLY_BASIC		/* pedantically POSIX */
};

enum addr_state {
  RANGE_INACTIVE,	/* never been active */
  RANGE_ACTIVE,		/* between first and second address */
  RANGE_CLOSED		/* like RANGE_INACTIVE, but range has ended once */
};

enum addr_types {
  ADDR_IS_NULL,		/* null address */
  ADDR_IS_REGEX,	/* a.addr_regex is valid */
  ADDR_IS_NUM,		/* a.addr_number is valid */
  ADDR_IS_NUM_MOD,	/* a.addr_number is valid, addr_step is modulo */
  ADDR_IS_STEP,		/* address is +N (only valid for addr2) */
  ADDR_IS_STEP_MOD,	/* address is ~N (only valid for addr2) */
  ADDR_IS_LAST		/* address is $ */
};

struct addr {
  enum addr_types addr_type;
  countT addr_number;
  countT addr_step;
  struct regex *addr_regex;
};


struct replacement {
  char *prefix;
  size_t prefix_length;
  int subst_id;
  enum replacement_types repl_type;
  struct replacement *next;
};

struct subst {
  struct regex *regx;
  struct replacement *replacement;
  countT numb;		/* if >0, only substitute for match number "numb" */
  struct output *outf;	/* 'w' option given */
  unsigned global : 1;	/* 'g' option given */
  unsigned print : 2;	/* 'p' option given (before/after eval) */
  unsigned eval : 1;	/* 'e' option given */
  unsigned max_id : 4;  /* maximum backreference on the RHS */
};

#ifdef REG_PERL
/* This is the structure we store register match data in.  See
   regex.texinfo for a full description of what registers match.  */
struct re_registers
{
  unsigned num_regs;
  regoff_t *start;
  regoff_t *end;
};
#endif



struct sed_cmd {
  struct addr *a1;	/* save space: usually is NULL */
  struct addr *a2;

  /* See description the enum, above.  */
  enum addr_state range_state;

  /* Non-zero if command is to be applied to non-matches. */
  char addr_bang;

  /* The actual command character. */
  char cmd;

  /* auxiliary data for various commands */
  union {
    /* This structure is used for a, i, and c commands. */
    struct text_buf cmd_txt;

    /* This is used for the l, q and Q commands. */
    int int_arg;

    /* This is used for the {}, b, and t commands. */
    countT jump_index;

    /* This is used for the r command. */
    char *fname;

    /* This is used for the hairy s command. */
    struct subst *cmd_subst;

    /* This is used for the w command. */
    struct output *outf;

    /* This is used for the R command. */
    FILE *fp;

    /* This is used for the y command. */
    unsigned char *translate;
    char **translatemb;
  } x;
};



void bad_prog P_((const char *why));
size_t normalize_text P_((char *text, size_t len, enum text_types buftype));
struct vector *compile_string P_((struct vector *, char *str, size_t len));
struct vector *compile_file P_((struct vector *, const char *cmdfile));
void check_final_program P_((struct vector *));
void rewind_read_files P_((void));
void finish_program P_((struct vector *));

struct regex *compile_regex P_((struct buffer *b, int flags, int needed_sub));
int match_regex P_((struct regex *regex,
		    char *buf, size_t buflen, size_t buf_start_offset,
		    struct re_registers *regarray, int regsize));
#ifdef DEBUG_LEAKS
void release_regex P_((struct regex *));
#endif

int process_files P_((struct vector *, char **argv));

int main P_((int, char **));

extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file));

extern int extended_regexp_flags;

/* If set, fflush(stdout) on every line output. */
extern bool unbuffered_output;

/* If set, don't write out the line unless explicitly told to. */
extern bool no_default_output;

/* If set, reset line counts on every new file. */
extern bool separate_files;

/* If set, follow symlinks when invoked with -i option */
extern bool follow_symlinks;

/* Do we need to be pedantically POSIX compliant? */
extern enum posixicity_types posixicity;

/* How long should the `l' command's output line be? */
extern countT lcmd_out_line_len;

/* How do we edit files in-place? (we don't if NULL) */
extern char *in_place_extension;

/* The mode to use to read files, either "rt" or "rb".  */
extern char *read_mode;

/* Should we use EREs? */
extern bool use_extended_syntax_p;

/* Declarations for multibyte character sets.  */
extern int mb_cur_max;
extern bool is_utf8;

#ifdef HAVE_MBRTOWC
#ifdef HAVE_BTOWC
#define MBRTOWC(pwc, s, n, ps) \
  (mb_cur_max == 1 ? \
   (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
   mbrtowc ((pwc), (s), (n), (ps)))

#define WCRTOMB(s, wc, ps) \
  (mb_cur_max == 1 ? \
   (*(s) = wctob ((wint_t) (wc)), 1) : \
   wcrtomb ((s), (wc), (ps)))
#else
#define MBRTOWC(pwc, s, n, ps) \
  mbrtowc ((pwc), (s), (n), (ps))

#define WCRTOMB(s, wc, ps) \
  wcrtomb ((s), (wc), (ps))
#endif

#define MBRLEN(s, n, ps) \
  (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))

#define BRLEN(ch, ps) \
  (mb_cur_max == 1 ? 1 : brlen (ch, ps))

#else
#define MBRLEN(s, n, ps) 1
#define BRLEN(ch, ps) 1
#endif

extern int brlen P_ ((int ch, mbstate_t *ps));
extern void initialize_mbcs P_ ((void));