1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
|
/* Declarations having to do with GNU Emacs syntax tables.
Copyright (C) 1985, 93, 94, 97, 1998 Free Software Foundation, Inc.
This file is part of GNU Emacs.
GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
extern Lisp_Object Qsyntax_table_p;
extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
/* The standard syntax table is stored where it will automatically
be used in all new buffers. */
#define Vstandard_syntax_table buffer_defaults.syntax_table
/* A syntax table is a chartable whose elements are cons cells
(CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
is not a kind of parenthesis.
The low 8 bits of CODE+FLAGS is a code, as follows: */
enum syntaxcode
{
Swhitespace, /* for a whitespace character */
Spunct, /* for random punctuation characters */
Sword, /* for a word constituent */
Ssymbol, /* symbol constituent but not word constituent */
Sopen, /* for a beginning delimiter */
Sclose, /* for an ending delimiter */
Squote, /* for a prefix character like Lisp ' */
Sstring, /* for a string-grouping character like Lisp " */
Smath, /* for delimiters like $ in Tex. */
Sescape, /* for a character that begins a C-style escape */
Scharquote, /* for a character that quotes the following character */
Scomment, /* for a comment-starting character */
Sendcomment, /* for a comment-ending character */
Sinherit, /* use the standard syntax table for this character */
Scomment_fence, /* Starts/ends comment which is delimited on the
other side by any char with the same syntaxcode. */
Sstring_fence, /* Starts/ends string which is delimited on the
other side by any char with the same syntaxcode. */
Smax /* Upper bound on codes that are meaningful */
};
/* Set the syntax entry VAL for char C in table TABLE. */
#define SET_RAW_SYNTAX_ENTRY(table, c, val) \
((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \
: Faset ((table), make_number (c), (val)))
/* Fetch the syntax entry for char C in syntax table TABLE.
This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS.
Do inheritance. */
#ifdef __GNUC__
#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
({ Lisp_Object tbl = table; \
Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \
while (NILP (temp)) \
{ \
tbl = XCHAR_TABLE (tbl)->parent; \
if (NILP (tbl)) \
break; \
temp = XCHAR_TABLE (tbl)->contents[(c)]; \
} \
temp; })
#else
extern Lisp_Object syntax_temp;
extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int));
#define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \
(syntax_temp = XCHAR_TABLE (table)->contents[(c)], \
(NILP (syntax_temp) \
? syntax_parent_lookup (table, (c)) \
: syntax_temp))
#endif
/* SYNTAX_ENTRY fetches the information from the entry for character C
in syntax table TABLE, or from globally kept data (gl_state).
Does inheritance. */
/* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
position, it is either the buffer's syntax table, or syntax table
found in text properties. */
#ifdef SYNTAX_ENTRY_VIA_PROPERTY
# define SYNTAX_ENTRY(c) \
(gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
# define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
#else
# define SYNTAX_ENTRY SYNTAX_ENTRY_INT
# define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
#endif
#define SYNTAX_ENTRY_INT(c) \
((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \
? SYNTAX_ENTRY_FOLLOW_PARENT (CURRENT_SYNTAX_TABLE, \
(unsigned char) (c)) \
: Faref (CURRENT_SYNTAX_TABLE, \
make_number (COMPOSITE_CHAR_P (c) \
? cmpchar_component ((c), 0, 1) \
: (c))))
/* Extract the information from the entry for character C
in the current syntax table. */
#ifdef __GNUC__
#define SYNTAX(c) \
({ Lisp_Object temp; \
temp = SYNTAX_ENTRY (c); \
(CONSP (temp) \
? (enum syntaxcode) (XINT (XCAR (temp)) & 0xff) \
: Swhitespace); })
#define SYNTAX_WITH_FLAGS(c) \
({ Lisp_Object temp; \
temp = SYNTAX_ENTRY (c); \
(CONSP (temp) \
? XINT (XCAR (temp)) \
: (int) Swhitespace); })
#define SYNTAX_MATCH(c) \
({ Lisp_Object temp; \
temp = SYNTAX_ENTRY (c); \
(CONSP (temp) \
? XCDR (temp) \
: Qnil); })
#else
#define SYNTAX(c) \
(syntax_temp = SYNTAX_ENTRY ((c)), \
(CONSP (syntax_temp) \
? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
: Swhitespace))
#define SYNTAX_WITH_FLAGS(c) \
(syntax_temp = SYNTAX_ENTRY ((c)), \
(CONSP (syntax_temp) \
? XINT (XCAR (syntax_temp)) \
: (int) Swhitespace))
#define SYNTAX_MATCH(c) \
(syntax_temp = SYNTAX_ENTRY ((c)), \
(CONSP (syntax_temp) \
? XCDR (syntax_temp) \
: Qnil))
#endif
/* Then there are seven single-bit flags that have the following meanings:
1. This character is the first of a two-character comment-start sequence.
2. This character is the second of a two-character comment-start sequence.
3. This character is the first of a two-character comment-end sequence.
4. This character is the second of a two-character comment-end sequence.
5. This character is a prefix, for backward-prefix-chars.
6. see below
7. This character is part of a nestable comment sequence.
Note that any two-character sequence whose first character has flag 1
and whose second character has flag 2 will be interpreted as a comment start.
bit 6 is used to discriminate between two different comment styles.
Languages such as C++ allow two orthogonal syntax start/end pairs
and bit 6 is used to determine whether a comment-end or Scommentend
ends style a or b. Comment start sequences can start style a or b.
Style a is always the default.
*/
/* These macros extract a particular flag for a given character. */
#define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
#define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
#define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
#define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
#define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
#define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
#define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
/* These macros extract specific flags from an integer
that holds the syntax code and the flags. */
#define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
#define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
#define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
#define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
#define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
#define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
#define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
/* This array, indexed by a character, contains the syntax code which that
character signifies (as a char). For example,
(enum syntaxcode) syntax_spec_code['w'] is Sword. */
extern unsigned char syntax_spec_code[0400];
/* Indexed by syntax code, give the letter that describes it. */
extern char syntax_code_spec[16];
/* Convert the byte offset BYTEPOS into a character position,
for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
These macros do nothing when parse_sexp_lookup_properties is 0,
so we return 0 in that case, for speed. */
#define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
(! parse_sexp_lookup_properties \
? 0 \
: STRINGP (gl_state.object) \
? string_byte_to_char (gl_state.object, (bytepos)) \
: BUFFERP (gl_state.object) \
? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
(bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
: NILP (gl_state.object) \
? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
: (bytepos))
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
currently good for a position before CHARPOS. */
#define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
(parse_sexp_lookup_properties \
&& (charpos) >= gl_state.e_property \
? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
gl_state.object), \
1) \
: 0)
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
currently good for a position after CHARPOS. */
#define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
(parse_sexp_lookup_properties \
&& (charpos) <= gl_state.b_property \
? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
gl_state.object), \
1) \
: 0)
/* Make syntax table good for CHARPOS. */
#define UPDATE_SYNTAX_TABLE(charpos) \
(parse_sexp_lookup_properties \
&& (charpos) <= gl_state.b_property \
? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
gl_state.object), \
1) \
: (parse_sexp_lookup_properties \
&& (charpos) >= gl_state.e_property \
? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
gl_state.object), \
1) \
: 0))
/* This macro should be called with FROM at the start of forward
search, or after the last position of the backward search. It
makes sure that the first char is picked up with correct table, so
one does not need to call UPDATE_SYNTAX_TABLE immediately after the
call.
Sign of COUNT gives the direction of the search.
*/
#define SETUP_SYNTAX_TABLE(FROM, COUNT) \
if (1) \
{ \
gl_state.b_property = BEGV - 1; \
gl_state.e_property = ZV + 1; \
gl_state.object = Qnil; \
gl_state.use_global = 0; \
gl_state.offset = 0; \
gl_state.current_syntax_table = current_buffer->syntax_table; \
if (parse_sexp_lookup_properties) \
if ((COUNT) > 0 || (FROM) > BEGV) \
update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
1, Qnil); \
} \
else
/* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
If it is t, ignore properties altogether.
This is meant for regex.c to use. For buffers, regex.c passes arguments
to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
So if it is a buffer, we set the offset field to BEGV. */
#define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
if (1) \
{ \
gl_state.object = (OBJECT); \
if (BUFFERP (gl_state.object)) \
{ \
struct buffer *buf = XBUFFER (gl_state.object); \
gl_state.b_property = 0; \
gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
gl_state.offset = BUF_BEGV (buf) - 1; \
} \
else if (NILP (gl_state.object)) \
{ \
gl_state.b_property = 0; \
gl_state.e_property = ZV - BEGV + 1; \
gl_state.offset = BEGV - 1; \
} \
else if (EQ (gl_state.object, Qt)) \
{ \
gl_state.b_property = - 1; \
gl_state.e_property = 1500000000; \
gl_state.offset = 0; \
} \
else \
{ \
gl_state.b_property = -1; \
gl_state.e_property = 1 + XSTRING (gl_state.object)->size; \
gl_state.offset = 0; \
} \
gl_state.use_global = 0; \
gl_state.current_syntax_table = current_buffer->syntax_table; \
if (parse_sexp_lookup_properties) \
update_syntax_table (((FROM) + gl_state.offset \
+ (COUNT > 0 ? 0 : -1)), \
COUNT, 1, gl_state.object); \
} \
else
struct gl_state_s
{
Lisp_Object object; /* The object we are scanning. */
int start; /* Where to stop. */
int stop; /* Where to stop. */
int use_global; /* Whether to use global_code
or c_s_t. */
Lisp_Object global_code; /* Syntax code of current char. */
Lisp_Object current_syntax_table; /* Syntax table for current pos. */
Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
int b_property; /* Last index where c_s_t is
not valid. */
int e_property; /* First index where c_s_t is
not valid. */
INTERVAL forward_i; /* Where to start lookup on forward */
INTERVAL backward_i; /* or backward movement. The
data in c_s_t is valid
between these intervals,
and possibly at the
intervals too, depending
on: */
/* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
int offset;
char left_ok;
char right_ok;
};
extern struct gl_state_s gl_state;
extern int parse_sexp_lookup_properties;
extern INTERVAL interval_of P_ ((int, Lisp_Object));
extern int scan_words P_ ((int, int));
|