summaryrefslogtreecommitdiff
path: root/gettext-tools/src/xgettext.h
blob: 167fcd3732757cbcce26f4d9229f6df1ed866177 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
/* xgettext common functions.
   Copyright (C) 2001-2003, 2005-2006, 2008-2009, 2011, 2015 Free
   Software Foundation, Inc.
   Written by Peter Miller <millerp@canb.auug.org.au>
   and Bruno Haible <haible@clisp.cons.org>, 2001.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef _XGETTEXT_H
#define _XGETTEXT_H

#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>

#if HAVE_ICONV
#include <iconv.h>
#endif

#include "message.h"
#include "pos.h"
#include "str-list.h"

/* Declare 'line_comment' and 'input_syntax'.  */
#include "read-catalog.h"


#ifdef __cplusplus
extern "C" {
#endif


/* If true, omit the header entry.
   If false, keep the header entry present in the input.  */
extern int xgettext_omit_header;

extern bool substring_match;


/* Calling convention for a given keyword.  */
struct callshape
{
  int argnum1; /* argument number to use for msgid */
  int argnum2; /* argument number to use for msgid_plural */
  int argnumc; /* argument number to use for msgctxt */
  bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */
  bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */
  int argtotal; /* total number of arguments */
  string_list_ty xcomments; /* auto-extracted comments */
};

/* Split keyword spec into keyword, argnum1, argnum2, argnumc.  */
extern void split_keywordspec (const char *spec, const char **endp,
                               struct callshape *shapep);

/* Set of alternative calling conventions for a given keyword.  */
struct callshapes
{
  const char *keyword;          /* the keyword, not NUL terminated */
  size_t keyword_len;           /* the keyword's length */
  size_t nshapes;
  struct callshape shapes[1];   /* actually nshapes elements */
};

/* Insert a (keyword, callshape) pair into a hash table mapping keyword to
   'struct callshapes *'.  */
extern void insert_keyword_callshape (hash_table *table,
                                      const char *keyword, size_t keyword_len,
                                      const struct callshape *shape);


/* Context representing some flags.  */
typedef struct flag_context_ty flag_context_ty;
struct flag_context_ty
{
  /* Regarding the primary formatstring type.  */
  /*enum is_format*/ unsigned int is_format1    : 3;
  /*bool*/           unsigned int pass_format1  : 1;
  /* Regarding the secondary formatstring type.  */
  /*enum is_format*/ unsigned int is_format2    : 3;
  /*bool*/           unsigned int pass_format2  : 1;
  /* Regarding the tertiary formatstring type.  */
  /*enum is_format*/ unsigned int is_format3    : 3;
  /*bool*/           unsigned int pass_format3  : 1;
};
/* Null context.  */
extern flag_context_ty null_context;
/* Transparent context.  */
extern flag_context_ty passthrough_context;
/* Compute an inherited context.
   The outer_context is assumed to have all pass_format* flags = false.
   The result will then also have all pass_format* flags = false.  */
extern flag_context_ty
       inherited_context (flag_context_ty outer_context,
                          flag_context_ty modifier_context);

/* Context representing some flags, for each possible argument number.
   This is a linked list, sorted according to the argument number.  */
typedef struct flag_context_list_ty flag_context_list_ty;
struct flag_context_list_ty
{
  int argnum;                   /* current argument number, > 0 */
  flag_context_ty flags;        /* flags for current argument */
  flag_context_list_ty *next;
};

/* Iterator through a flag_context_list_ty.  */
typedef struct flag_context_list_iterator_ty flag_context_list_iterator_ty;
struct flag_context_list_iterator_ty
{
  int argnum;                           /* current argument number, > 0 */
  const flag_context_list_ty* head;     /* tail of list */
};
extern flag_context_list_iterator_ty null_context_list_iterator;
extern flag_context_list_iterator_ty passthrough_context_list_iterator;
extern flag_context_list_iterator_ty
       flag_context_list_iterator (flag_context_list_ty *list);
extern flag_context_ty
       flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter);

/* For nearly each backend, we have a separate table mapping a keyword to
   a flag_context_list_ty *.  */
typedef hash_table /* char[] -> flag_context_list_ty * */
        flag_context_list_table_ty;
extern flag_context_list_ty *
       flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
                                       const void *key, size_t keylen);
/* Record a flag in the appropriate backend's table.  */
extern void xgettext_record_flag (const char *optionstring);


/* Context while building up lexical tokens.  */
typedef enum
  {
    lc_outside, /* Initial context: outside of comments and strings.  */
    lc_comment, /* Inside a comment.  */
    lc_string,  /* Inside a string literal.  */

    /* For embedded XML in programming code, like E4X in JavaScript.  */
    lc_xml_open_tag,   /* Inside an opening tag of an XML element.  */
    lc_xml_close_tag,  /* Inside a closing tag of an XML element.  */
    lc_xml_content     /* Inside an XML text node.  */
  }
  lexical_context_ty;

/* Error message about non-ASCII character in a specific lexical context.  */
extern char *non_ascii_error_message (lexical_context_ty lcontext,
                                      const char *file_name,
                                      size_t line_number);


/* Canonicalized encoding name for all input files.  */
extern const char *xgettext_global_source_encoding;

#if HAVE_ICONV
/* Converter from xgettext_global_source_encoding to UTF-8 (except from
   ASCII or UTF-8, when this conversion is a no-op).  */
extern iconv_t xgettext_global_source_iconv;
#endif

/* Canonicalized encoding name for the current input file.  */
extern const char *xgettext_current_source_encoding;

#if HAVE_ICONV
/* Converter from xgettext_current_source_encoding to UTF-8 (except from
   ASCII or UTF-8, when this conversion is a no-op).  */
extern iconv_t xgettext_current_source_iconv;
#endif

/* Convert the given string from xgettext_current_source_encoding to
   the output file encoding (i.e. ASCII or UTF-8).
   The resulting string is either the argument string, or freshly allocated.
   The lcontext, file_name and line_number are only used for error message
   purposes.  */
extern char *from_current_source_encoding (const char *string,
                                           lexical_context_ty lcontext,
                                           const char *file_name,
                                           size_t line_number);


/* List of messages whose msgids must not be extracted, or NULL.
   Used by remember_a_message().  */
extern message_list_ty *exclude;


/* Comment handling for backends which support combining adjacent strings
   even across lines.
   In these backends we cannot use the xgettext_comment* functions directly,
   because in multiline string expressions like
           "string1" +
           "string2"
   the newline between "string1" and "string2" would cause a call to
   xgettext_comment_reset(), thus destroying the accumulated comments
   that we need a little later, when we have concatenated the two strings
   and pass them to remember_a_message().
   Instead, we do the bookkeeping of the accumulated comments directly,
   and save a pointer to the accumulated comments when we read "string1".
   In order to avoid excessive copying of strings, we use reference
   counting.  */

typedef struct refcounted_string_list_ty refcounted_string_list_ty;
struct refcounted_string_list_ty
{
  unsigned int refcount;
  struct string_list_ty contents;
};

static inline refcounted_string_list_ty *
add_reference (refcounted_string_list_ty *rslp)
{
  if (rslp != NULL)
    rslp->refcount++;
  return rslp;
}

static inline void
drop_reference (refcounted_string_list_ty *rslp)
{
  if (rslp != NULL)
    {
      if (rslp->refcount > 1)
        rslp->refcount--;
      else
        {
          string_list_destroy (&rslp->contents);
          free (rslp);
        }
    }
}

extern refcounted_string_list_ty *savable_comment;
extern void savable_comment_add (const char *str);
extern void savable_comment_reset (void);

/* Convert character encoding of COMMENT according to the current
   source encoding.  Returns a new refcounted_string_list_ty.  */
extern refcounted_string_list_ty *
       savable_comment_convert_encoding (refcounted_string_list_ty *comment,
                                         lex_pos_ty *pos);


enum literalstring_escape_type
{
  LET_NONE = 0,
  LET_ANSI_C = 1 << 0,
  LET_UNICODE = 1 << 1
};

struct literalstring_parser
{
  char * (*parse) (const char *string, lex_pos_ty *pos,
                   enum literalstring_escape_type type);
};

/* Add a message to the list of extracted messages.
   msgctxt must be either NULL or a malloc()ed string; its ownership is passed
   to the callee.
   MSGID must be a malloc()ed string; its ownership is passed to the callee.
   POS->file_name must be allocated with indefinite extent.
   EXTRACTED_COMMENT is a comment that needs to be copied into the POT file,
   or NULL.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.
   Return the new or found message, or NULL if the message is excluded.  */
extern message_ty *remember_a_message (message_list_ty *mlp,
                                       char *msgctxt,
                                       char *msgid,
                                       flag_context_ty context,
                                       lex_pos_ty *pos,
                                       const char *extracted_comment,
                                       refcounted_string_list_ty *comment);

/* Add an msgid_plural to a message previously returned by
   remember_a_message.
   STRING must be a malloc()ed string; its ownership is passed to the callee.
   POS->file_name must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void remember_a_message_plural (message_ty *mp,
                                       char *string,
                                       flag_context_ty context,
                                       lex_pos_ty *pos,
                                       refcounted_string_list_ty *comment);

/* Represents the progressive parsing of an argument list w.r.t. a single
   'struct callshape'.  */
struct partial_call
{
  int argnumc;                  /* number of context argument, 0 when seen */
  int argnum1;                  /* number of singular argument, 0 when seen */
  int argnum2;                  /* number of plural argument, 0 when seen */
  bool argnum1_glib_context;    /* argument argnum1 has the syntax "ctxt|msgid" */
  bool argnum2_glib_context;    /* argument argnum2 has the syntax "ctxt|msgid" */
  int argtotal;                 /* total number of arguments, 0 if unspecified */
  string_list_ty xcomments;     /* auto-extracted comments */
  char *msgctxt;                /* context - owned string, or NULL */
  enum literalstring_escape_type msgctxt_escape;
  lex_pos_ty msgctxt_pos;
  char *msgid;                  /* msgid - owned string, or NULL */
  enum literalstring_escape_type msgid_escape;
  flag_context_ty msgid_context;
  lex_pos_ty msgid_pos;
  refcounted_string_list_ty *msgid_comment;
  char *msgid_plural;           /* msgid_plural - owned string, or NULL */
  enum literalstring_escape_type msgid_plural_escape;
  flag_context_ty msgid_plural_context;
  lex_pos_ty msgid_plural_pos;
};

/* Represents the progressive parsing of an argument list w.r.t. an entire
   'struct callshapes'.  */
struct arglist_parser
{
  message_list_ty *mlp;         /* list where the message shall be added */
  const char *keyword;          /* the keyword, not NUL terminated */
  size_t keyword_len;           /* the keyword's length */
  size_t nalternatives;         /* number of partial_call alternatives */
  struct partial_call alternative[1]; /* partial_call alternatives */
};

/* Creates a fresh arglist_parser recognizing calls.
   You can pass shapes = NULL for a parser not recognizing any calls.  */
extern struct arglist_parser * arglist_parser_alloc (message_list_ty *mlp,
                                                     const struct callshapes *shapes);
/* Clones an arglist_parser.  */
extern struct arglist_parser * arglist_parser_clone (struct arglist_parser *ap);
/* Adds a string argument to an arglist_parser.  ARGNUM must be > 0.
   STRING must be malloc()ed string; its ownership is passed to the callee.
   FILE_NAME must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void arglist_parser_remember (struct arglist_parser *ap,
                                     int argnum, char *string,
                                     flag_context_ty context,
                                     char *file_name, size_t line_number,
                                     refcounted_string_list_ty *comment);
/* Adds an uninterpreted string argument to an arglist_parser.  ARGNUM
   must be > 0.
   STRING is must be malloc()ed string; its ownership is passed to the callee.
   FILE_NAME must be allocated with indefinite extent.
   COMMENT may be savable_comment, or it may be a saved copy of savable_comment
   (then add_reference must be used when saving it, and drop_reference while
   dropping it).  Clear savable_comment.  */
extern void arglist_parser_remember_literal (struct arglist_parser *ap,
                                             int argnum, char *string,
                                             flag_context_ty context,
                                             char *file_name, size_t line_number,
                                             refcounted_string_list_ty *comment,
                                             enum literalstring_escape_type type);
/* Tests whether an arglist_parser has is not waiting for more arguments after
   argument ARGNUM.  */
extern bool arglist_parser_decidedp (struct arglist_parser *ap, int argnum);
/* Terminates the processing of an arglist_parser after argument ARGNUM and
   deletes it.  */
extern void arglist_parser_done (struct arglist_parser *ap, int argnum);


/* A string buffer type that allows appending bytes (in the
   xgettext_current_source_encoding) or Unicode characters.
   Returns the entire string in UTF-8 encoding.  */

struct mixed_string_buffer
{
  /* The part of the string that has already been converted to UTF-8.  */
  char *utf8_buffer;
  size_t utf8_buflen;
  size_t utf8_allocated;
  /* The first half of an UTF-16 surrogate character.  */
  unsigned short utf16_surr;
  /* The part of the string that is still in the source encoding.  */
  char *curr_buffer;
  size_t curr_buflen;
  size_t curr_allocated;
  /* The lexical context.  Used only for error message purposes.  */
  lexical_context_ty lcontext;
  const char *logical_file_name;
  int line_number;
};

/* Creates a fresh mixed_string_buffer.  */
extern struct mixed_string_buffer *
       mixed_string_buffer_alloc (lexical_context_ty lcontext,
                                  const char *logical_file_name,
                                  int line_number);

/* Appends a character to a mixed_string_buffer.  */
extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp,
                                             int c);

/* Appends a Unicode character to a mixed_string_buffer.  */
extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp,
                                                int c);

/* Frees mixed_string_buffer and returns the accumulated string in UTF-8.  */
extern char * mixed_string_buffer_done (struct mixed_string_buffer *bp);


#ifdef __cplusplus
}
#endif


#endif /* _XGETTEXT_H */