src/coding.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787

/* Header for coding system handler.
   Copyright (C) 2001-2023 Free Software Foundation, Inc.
   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
     2005, 2006, 2007, 2008, 2009, 2010, 2011
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H14PRO021
   Copyright (C) 2003
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H13PRO009

This file is part of GNU Emacs.

GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.

GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */

#ifndef EMACS_CODING_H
#define EMACS_CODING_H

#include "lisp.h"

INLINE_HEADER_BEGIN

/* Index to arguments of Fdefine_coding_system_internal.  */

enum define_coding_system_arg_index
  {
    coding_arg_name,
    coding_arg_mnemonic,
    coding_arg_coding_type,
    coding_arg_charset_list,
    coding_arg_ascii_compatible_p,
    coding_arg_decode_translation_table,
    coding_arg_encode_translation_table,
    coding_arg_post_read_conversion,
    coding_arg_pre_write_conversion,
    coding_arg_default_char,
    coding_arg_for_unibyte,
    coding_arg_plist,
    coding_arg_eol_type,
    coding_arg_max
  };

enum define_coding_iso2022_arg_index
  {
    coding_arg_iso2022_initial = coding_arg_max,
    coding_arg_iso2022_reg_usage,
    coding_arg_iso2022_request,
    coding_arg_iso2022_flags,
    coding_arg_iso2022_max
  };

enum define_coding_utf8_arg_index
  {
    coding_arg_utf8_bom = coding_arg_max,
    coding_arg_utf8_max
  };

enum define_coding_utf16_arg_index
  {
    coding_arg_utf16_bom = coding_arg_max,
    coding_arg_utf16_endian,
    coding_arg_utf16_max
  };

enum define_coding_ccl_arg_index
  {
    coding_arg_ccl_decoder = coding_arg_max,
    coding_arg_ccl_encoder,
    coding_arg_ccl_valids,
    coding_arg_ccl_max
  };

enum define_coding_undecided_arg_index
  {
    coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
    coding_arg_undecided_inhibit_iso_escape_detection,
    coding_arg_undecided_prefer_utf_8,
    coding_arg_undecided_max
  };

/* Hash table for all coding systems.  Keys are coding system symbols
   and values are spec vectors of the corresponding coding system.  A
   spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
   vector of attribute of the coding system.  ALIASES is a list of
   aliases (symbols) of the coding system.  EOL-TYPE is `unix', `dos',
   `mac' or a vector of coding systems (symbols).  */

extern Lisp_Object Vcoding_system_hash_table;

/* Enumeration of index to an attribute vector of a coding system.  */

enum coding_attr_index
  {
    coding_attr_base_name,
    coding_attr_docstring,
    coding_attr_mnemonic,
    coding_attr_type,
    coding_attr_charset_list,
    coding_attr_ascii_compat,
    coding_attr_decode_tbl,
    coding_attr_encode_tbl,
    coding_attr_trans_tbl,
    coding_attr_post_read,
    coding_attr_pre_write,
    coding_attr_default_char,
    coding_attr_for_unibyte,
    coding_attr_plist,

    coding_attr_category,
    coding_attr_safe_charsets,

    /* The followings are extra attributes for each type.  */
    coding_attr_charset_valids,

    coding_attr_ccl_decoder,
    coding_attr_ccl_encoder,
    coding_attr_ccl_valids,

    coding_attr_iso_initial,
    coding_attr_iso_usage,
    coding_attr_iso_request,
    coding_attr_iso_flags,

    coding_attr_utf_bom,
    coding_attr_utf_16_endian,

    coding_attr_emacs_mule_full,

    coding_attr_undecided_inhibit_null_byte_detection,
    coding_attr_undecided_inhibit_iso_escape_detection,
    coding_attr_undecided_prefer_utf_8,

    coding_attr_last_index
  };


/* Macros to access an element of an attribute vector.  */

#define CODING_ATTR_BASE_NAME(attrs)	AREF (attrs, coding_attr_base_name)
#define CODING_ATTR_TYPE(attrs)		AREF (attrs, coding_attr_type)
#define CODING_ATTR_CHARSET_LIST(attrs)	AREF (attrs, coding_attr_charset_list)
#define CODING_ATTR_MNEMONIC(attrs)	AREF (attrs, coding_attr_mnemonic)
#define CODING_ATTR_DOCSTRING(attrs)	AREF (attrs, coding_attr_docstring)
#define CODING_ATTR_ASCII_COMPAT(attrs)	AREF (attrs, coding_attr_ascii_compat)
#define CODING_ATTR_DECODE_TBL(attrs)	AREF (attrs, coding_attr_decode_tbl)
#define CODING_ATTR_ENCODE_TBL(attrs)	AREF (attrs, coding_attr_encode_tbl)
#define CODING_ATTR_TRANS_TBL(attrs)	AREF (attrs, coding_attr_trans_tbl)
#define CODING_ATTR_POST_READ(attrs)	AREF (attrs, coding_attr_post_read)
#define CODING_ATTR_PRE_WRITE(attrs)	AREF (attrs, coding_attr_pre_write)
#define CODING_ATTR_DEFAULT_CHAR(attrs)	AREF (attrs, coding_attr_default_char)
#define CODING_ATTR_FOR_UNIBYTE(attrs)	AREF (attrs, coding_attr_for_unibyte)
#define CODING_ATTR_PLIST(attrs)	AREF (attrs, coding_attr_plist)
#define CODING_ATTR_CATEGORY(attrs)	AREF (attrs, coding_attr_category)
#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)


/* Return the name of a coding system specified by ID.  */
#define CODING_ID_NAME(id) \
  (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))

/* Return the attribute vector of a coding system specified by ID.  */

#define CODING_ID_ATTRS(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))

/* Return the list of aliases of a coding system specified by ID.  */

#define CODING_ID_ALIASES(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))

/* Return the eol-type of a coding system specified by ID.  */

#define CODING_ID_EOL_TYPE(id)	\
  (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))


/* Return the spec vector of CODING_SYSTEM_SYMBOL.  */

#define CODING_SYSTEM_SPEC(coding_system_symbol)	\
  (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))


/* Return the ID of CODING_SYSTEM_SYMBOL.  */

#define CODING_SYSTEM_ID(coding_system_symbol)			\
  hash_lookup (XHASH_TABLE (Vcoding_system_hash_table),		\
	       coding_system_symbol, NULL)

/* Return true if CODING_SYSTEM_SYMBOL is a coding system.  */

#define CODING_SYSTEM_P(coding_system_symbol)		\
  (CODING_SYSTEM_ID (coding_system_symbol) >= 0		\
   || (! NILP (coding_system_symbol)			\
       && ! NILP (Fcoding_system_p (coding_system_symbol))))

/* Check if X is a coding system or not.  */

#define CHECK_CODING_SYSTEM(x)				\
  do {							\
    if (CODING_SYSTEM_ID (x) < 0			\
	&& NILP (Fcheck_coding_system (x)))		\
      wrong_type_argument (Qcoding_system_p, (x));	\
  } while (false)


/* Check if X is a coding system or not.  If it is, set SEPC to the
   spec vector of the coding system.  */

#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec)		\
  do {							\
    spec = CODING_SYSTEM_SPEC (x);			\
    if (NILP (spec))					\
      {							\
	Fcheck_coding_system (x);			\
	spec = CODING_SYSTEM_SPEC (x);			\
      }							\
    if (NILP (spec))					\
      wrong_type_argument (Qcoding_system_p, (x));	\
  } while (false)


/* Check if X is a coding system or not.  If it is, set ID to the
   ID of the coding system.  */

#define CHECK_CODING_SYSTEM_GET_ID(x, id)			\
  do								\
    {								\
      id = CODING_SYSTEM_ID (x);				\
      if (id < 0)						\
	{							\
	  Fcheck_coding_system (x);				\
	  id = CODING_SYSTEM_ID (x);				\
	}							\
      if (id < 0)						\
	wrong_type_argument (Qcoding_system_p, (x));	\
    } while (false)


/*** GENERAL section ***/

/* Enumeration of result code of code conversion.  */
enum coding_result_code
  {
    CODING_RESULT_SUCCESS,
    CODING_RESULT_INSUFFICIENT_SRC,
    CODING_RESULT_INSUFFICIENT_DST,
    CODING_RESULT_INVALID_SRC,
    CODING_RESULT_INTERRUPT
  };


/* Macros used for the member `mode' of the struct coding_system.  */

/* If set, the decoding/encoding routines treat the current data as
   the last block of the whole text to be converted, and do the
   appropriate finishing job.  */
#define CODING_MODE_LAST_BLOCK			0x01

/* If set, it means that the current source text is in a buffer which
   enables selective display.  */
#define CODING_MODE_SELECTIVE_DISPLAY		0x02

/* This flag is used by the decoding/encoding routines on the fly.  If
   set, it means that right-to-left text is being processed.  */
#define CODING_MODE_DIRECTION			0x04

#define CODING_MODE_FIXED_DESTINATION		0x08

/* If set, it means that the encoding routines produces some safe
   ASCII characters (usually '?') for unsupported characters.  */
#define CODING_MODE_SAFE_ENCODING		0x10

  /* For handling composition sequence.  */
#include "composite.h"

enum composition_state
  {
    COMPOSING_NO,
    COMPOSING_CHAR,
    COMPOSING_RULE,
    COMPOSING_COMPONENT_CHAR,
    COMPOSING_COMPONENT_RULE
  };

/* Structure for the current composition status.  */
struct composition_status
{
  enum composition_state state;
  enum composition_method method;
  bool old_form;	  /* true if pre-21 form */
  int length;		  /* number of elements produced in charbuf */
  int nchars;		  /* number of characters composed */
  int ncomps;		  /* number of composition components */
  /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
     See the comment in coding.c.  */
  int carryover[4 		/* annotation header */
		+ MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
		+ 2				     /* intermediate -1 -1 */
		+ MAX_COMPOSITION_COMPONENTS	     /* CHARs */
		];
};


/* Structure of the field `spec.iso_2022' in the structure
   `coding_system'.  */
struct iso_2022_spec
{
  /* Bit-wise-or of CODING_ISO_FLAG_XXX.  */
  unsigned flags;

  /* The current graphic register invoked to each graphic plane.  */
  int current_invocation[2];

  /* The current charset designated to each graphic register.  The
     value -1 means that not charset is designated, -2 means that
     there was an invalid designation previously.  */
  int current_designation[4];

  /* If positive, we are now scanning CTEXT extended segment.  */
  int ctext_extended_segment_len;

  /* True temporarily only when graphic register 2 or 3 is invoked by
     single-shift while encoding.  */
  bool_bf single_shifting : 1;

  /* True temporarily only when processing at beginning of line.  */
  bool_bf bol : 1;

  /* If true, we are now scanning embedded UTF-8 sequence.  */
  bool_bf embedded_utf_8 : 1;

  /* The current composition.  */
  struct composition_status cmp_status;
};

struct emacs_mule_spec
{
  struct composition_status cmp_status;
};

struct undecided_spec
{
  /* Inhibit null byte detection.  1 means always inhibit,
     -1 means do not inhibit, 0 means rely on user variable.  */
  int inhibit_nbd;

  /* Inhibit ISO escape detection.  -1, 0, 1 as above.  */
  int inhibit_ied;

  /* Prefer UTF-8 when the input could be other encodings.  */
  bool prefer_utf_8;
};

enum utf_bom_type
  {
    utf_detect_bom,
    utf_without_bom,
    utf_with_bom
  };

enum utf_16_endian_type
  {
    utf_16_big_endian,
    utf_16_little_endian
  };

struct utf_16_spec
{
  enum utf_bom_type bom;
  enum utf_16_endian_type endian;
  int surrogate;
};

struct coding_detection_info
{
  /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs.  */
  /* Which categories are already checked.  */
  int checked;
  /* Which categories are strongly found.  */
  int found;
  /* Which categories are rejected.  */
  int rejected;
};


struct coding_system
{
  /* ID number of the coding system.  This is an index to
     Vcoding_system_hash_table.  This value is set by
     setup_coding_system.  At the early stage of building time, this
     value is -1 in the array coding_categories to indicate that no
     coding-system of that category is yet defined.  */
  ptrdiff_t id;

  /* Flag bits of the coding system.  The meaning of each bit is common
     to all types of coding systems.  */
  unsigned common_flags : 14;

  /* Mode bits of the coding system.  See the comments of the macros
     CODING_MODE_XXX.  */
  unsigned mode : 5;

  /* The following two members specify how binary 8-bit code 128..255
     are represented in source and destination text respectively.  True
     means they are represented by 2-byte sequence, false means they are
     represented by 1-byte as is (see the comment in character.h).  */
  bool_bf src_multibyte : 1;
  bool_bf dst_multibyte : 1;

  /* True if the source of conversion is not in the member
     `charbuf', but at `src_object'.  */
  bool_bf chars_at_source : 1;

  /* Nonzero if the result of conversion is in `destination'
     buffer rather than in `dst_object'.  */
  bool_bf raw_destination : 1;

  /* Set to true if charbuf contains an annotation.  */
  bool_bf annotated : 1;

  /* Used internally in coding.c.  See the comment of detect_ascii.  */
  unsigned eol_seen : 3;

  /* Finish status of code conversion.  */
  ENUM_BF (coding_result_code) result : 3;

  int max_charset_id;

  /* Detailed information specific to each type of coding system.  */
  union
    {
      struct iso_2022_spec iso_2022;
      struct ccl_spec *ccl;	/* Defined in ccl.h.  */
      struct utf_16_spec utf_16;
      enum utf_bom_type utf_8_bom;
      struct emacs_mule_spec emacs_mule;
      struct undecided_spec undecided;
    } spec;

  unsigned char *safe_charsets;

  /* How many heading bytes we can skip for decoding.  This is set to
     -1 in setup_coding_system, and updated by detect_coding.  So,
     when this is equal to the byte length of the text being
     converted, we can skip the actual conversion process except for
     the eol format.  */
  ptrdiff_t head_ascii;

  /* How many bytes/chars at the source are detected as valid utf-8
     sequence.  Set by detect_coding_utf_8.  */
  ptrdiff_t detected_utf8_bytes, detected_utf8_chars;

  /* The following members are set by encoding/decoding routine.  */
  ptrdiff_t produced, produced_char, consumed, consumed_char;

  ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
  Lisp_Object src_object;
  const unsigned char *source;

  ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
  Lisp_Object dst_object;
  unsigned char *destination;

  /* If an element is non-negative, it is a character code.

     If it is in the range -128..-1, it is a 8-bit character code
     minus 256.

     If it is less than -128, it specifies the start of an annotation
     chunk.  The length of the chunk is -128 minus the value of the
     element.  The following elements are OFFSET, ANNOTATION-TYPE, and
     a sequence of actual data for the annotation.  OFFSET is a
     character position offset from dst_pos or src_pos,
     ANNOTATION-TYPE specifies the meaning of the annotation and how to
     handle the following data..  */
  int *charbuf;
  int charbuf_size, charbuf_used;

  unsigned char carryover[64];
  int carryover_bytes;

  int default_char;

  bool (*detector) (struct coding_system *, struct coding_detection_info *);
  void (*decoder) (struct coding_system *);
  bool (*encoder) (struct coding_system *);
};

/* Meanings of bits in the member `common_flags' of the structure
   coding_system.  The lowest 8 bits are reserved for various kind of
   annotations (currently two of them are used).  */
#define CODING_ANNOTATION_MASK			0x00FF
#define CODING_ANNOTATE_COMPOSITION_MASK	0x0001
#define CODING_ANNOTATE_DIRECTION_MASK		0x0002
#define CODING_ANNOTATE_CHARSET_MASK		0x0003
#define CODING_FOR_UNIBYTE_MASK			0x0100
#define CODING_REQUIRE_FLUSHING_MASK		0x0200
#define CODING_REQUIRE_DECODING_MASK		0x0400
#define CODING_REQUIRE_ENCODING_MASK		0x0800
#define CODING_REQUIRE_DETECTION_MASK		0x1000
#define CODING_RESET_AT_BOL_MASK		0x2000

/* Return nonzero if the coding context CODING requires annotation
   handling.  */
#define CODING_REQUIRE_ANNOTATION(coding) \
  ((coding)->common_flags & CODING_ANNOTATION_MASK)

/* Return nonzero if the coding context CODING prefers decoding into
   unibyte.  */
#define CODING_FOR_UNIBYTE(coding) \
  ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)

/* Return nonzero if the coding context CODING requires specific code to be
   attached at the tail of converted text.  */
#define CODING_REQUIRE_FLUSHING(coding) \
  ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)

/* Return nonzero if the coding context CODING requires code conversion on
   decoding.  */
#define CODING_REQUIRE_DECODING(coding)	\
  ((coding)->dst_multibyte		\
   || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)


/* Return nonzero if the coding context CODING requires code conversion on
   encoding.
   The non-multibyte part of the condition is to support encoding of
   unibyte strings/buffers generated by string-as-unibyte or
   (set-buffer-multibyte nil) from multibyte strings/buffers.  */
#define CODING_REQUIRE_ENCODING(coding)				\
  ((coding)->src_multibyte					\
   || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK	\
   || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)


/* Return nonzero if the coding context CODING requires some kind of code
   detection.  */
#define CODING_REQUIRE_DETECTION(coding) \
  ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)

/* Return nonzero if the coding context CODING requires code conversion on
   decoding or some kind of code detection.  */
#define CODING_MAY_REQUIRE_DECODING(coding)	\
  (CODING_REQUIRE_DECODING (coding)		\
   || CODING_REQUIRE_DETECTION (coding))

/* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
   S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
   system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
   internal format.  */

#define SJIS_TO_JIS(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),	\
       j2 = s2 - 0x7E);					\
    else						\
      (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),	\
       j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F));		\
    (code) = (j1 << 8) | j2;				\
  } while (false)

#define SJIS_TO_JIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    s1 = (code) >> 8, s2 = (code) & 0xFF;		\
							\
    if (s2 >= 0x9F)					\
      {							\
	j1 = (s1 == 0xF0 ? 0x28				\
	      : s1 == 0xF1 ? 0x24			\
	      : s1 == 0xF2 ? 0x2C			\
	      : s1 == 0xF3 ? 0x2E			\
	      : 0x6E + (s1 - 0xF4) * 2);		\
	j2 = s2 - 0x7E;					\
      }							\
    else						\
      {							\
	j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2	\
	      : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2	\
	      : 0x6F + (s1 - 0xF5) * 2);		\
	j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F));		\
      }							\
    (code) = (j1 << 8) | j2;				\
  } while (false)


#define JIS_TO_SJIS(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1),	\
       s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F));		\
    else						\
      (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0),	\
       s2 = j2 + 0x7E);					\
    (code) = (s1 << 8) | s2;				\
  } while (false)

#define JIS_TO_SJIS2(code)				\
  do {							\
    int s1, s2, j1, j2;					\
							\
    j1 = (code) >> 8, j2 = (code) & 0xFF;		\
    if (j1 & 1)						\
      {							\
	s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2	\
	      : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2	\
	      : 0xF5 + (j1 - 0x6F) / 2);		\
	s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F);		\
      }							\
    else						\
      {							\
	s1 = (j1 == 0x28 ? 0xF0				\
	      : j1 == 0x24 ? 0xF1			\
	      : j1 == 0x2C ? 0xF2			\
	      : j1 == 0x2E ? 0xF3			\
	      : 0xF4 + (j1 - 0x6E) / 2);		\
	s2 = j2 + 0x7E;					\
      }							\
    (code) = (s1 << 8) | s2;				\
  } while (false)

/* Encode the file name NAME using the specified coding system
   for file names, if any.  May return NAME itself.  */
#define ENCODE_FILE(NAME)  encode_file_name (NAME)

/* Decode the file name NAME using the specified coding system
   for file names, if any.  May return NAME itself.  */
#define DECODE_FILE(NAME)  decode_file_name (NAME)

/* Encode the string STR using the specified coding system
   for system functions, if any.  */
#define ENCODE_SYSTEM(str)						   \
  (! NILP (Vlocale_coding_system)					   \
   ? code_convert_string_norecord (str, Vlocale_coding_system, true)	   \
   : str)

/* Decode the string STR using the specified coding system
   for system functions, if any.  */
#define DECODE_SYSTEM(str)						   \
  (! NILP (Vlocale_coding_system)					   \
   ? code_convert_string_norecord (str, Vlocale_coding_system, false)	   \
   : str)

/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op.  */
#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)

/* Return true if VAL is a high surrogate.  VAL must be a 16-bit code
   unit.  */

#define UTF_16_HIGH_SURROGATE_P(val) \
  (((val) & 0xFC00) == 0xD800)

/* Return true if VAL is a low surrogate.  VAL must be a 16-bit code
   unit.  */

#define UTF_16_LOW_SURROGATE_P(val) \
  (((val) & 0xFC00) == 0xDC00)

/* Extern declarations.  */
extern Lisp_Object code_conversion_save (bool, bool);
extern bool encode_coding_utf_8 (struct coding_system *);
extern bool utf8_string_p (Lisp_Object);
extern void setup_coding_system (Lisp_Object, struct coding_system *);
extern Lisp_Object coding_charset_list (struct coding_system *);
extern Lisp_Object coding_system_charset_list (Lisp_Object);
extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
                                        Lisp_Object, bool, bool, bool);
extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
                                                 bool);
extern Lisp_Object encode_string_utf_8 (Lisp_Object, Lisp_Object, bool,
					Lisp_Object, Lisp_Object);
extern Lisp_Object decode_string_utf_8 (Lisp_Object, const char *, ptrdiff_t,
					Lisp_Object, bool,
					Lisp_Object, Lisp_Object);
extern Lisp_Object encode_file_name (Lisp_Object);
extern Lisp_Object decode_file_name (Lisp_Object);
extern Lisp_Object raw_text_coding_system (Lisp_Object);
extern bool raw_text_coding_system_p (struct coding_system *);
extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
extern Lisp_Object complement_process_encoding_system (Lisp_Object);
extern Lisp_Object make_string_from_utf8 (const char *, ptrdiff_t);

extern void decode_coding_gap (struct coding_system *, ptrdiff_t);
extern void decode_coding_object (struct coding_system *,
                                  Lisp_Object, ptrdiff_t, ptrdiff_t,
                                  ptrdiff_t, ptrdiff_t, Lisp_Object);
extern void encode_coding_object (struct coding_system *,
                                  Lisp_Object, ptrdiff_t, ptrdiff_t,
                                  ptrdiff_t, ptrdiff_t, Lisp_Object);
/* Defined in this file.  */
INLINE int surrogates_to_codepoint (int, int);

#if defined (WINDOWSNT) || defined (CYGWIN)

/* These functions use Lisp string objects to store the UTF-16LE
   strings that modern versions of Windows expect.  These strings are
   not particularly useful to Lisp, and all Lisp strings should be
   native Emacs multibyte.  */

/* Access the wide-character string stored in a Lisp string object.  */
#define WCSDATA(x) ((wchar_t *) SDATA (x))

/* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
   string, and store it in *BUF.  BUF may safely point to STR on entry.  */
extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);

/* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
   object, to a multi-byte Emacs string and return it.  This function
   calls code_convert_string_norecord internally and has all its
   failure modes.  STR itself is not modified.  */
extern Lisp_Object from_unicode (Lisp_Object str);

/* Convert WSTR to an Emacs string.  */
extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);

#endif /* WINDOWSNT || CYGWIN */

/* Macros for backward compatibility.  */

#define encode_coding_string(coding, string, nocopy)			\
  (STRING_MULTIBYTE(string) ?						\
    (encode_coding_object (coding, string, 0, 0, SCHARS (string),	\
			   SBYTES (string), Qt),			\
     (coding)->dst_object) : (string))


#define decode_coding_c_string(coding, src, bytes, dst_object)		\
  do {									\
    (coding)->source = (src);						\
    (coding)->src_chars = (coding)->src_bytes = (bytes);		\
    decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes),	\
			  (dst_object));				\
  } while (false)


/* Return the Unicode code point for the given UTF-16 surrogates.  */

INLINE int
surrogates_to_codepoint (int low, int high)
{
  eassert (0 <= low && low <= 0xFFFF);
  eassert (0 <= high && high <= 0xFFFF);
  eassert (UTF_16_LOW_SURROGATE_P (low));
  eassert (UTF_16_HIGH_SURROGATE_P (high));
  return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
}

/* Like build_string, but always returns a multibyte string, and is
   optimized for speed when STR is a UTF-8 encoded text string.  */

INLINE Lisp_Object
build_string_from_utf8 (const char *str)
{
  return make_string_from_utf8 (str, strlen (str));
}


extern Lisp_Object preferred_coding_system (void);

/* Coding system to be used to encode text for terminal display when
   terminal coding system is nil.  */
extern struct coding_system safe_terminal_coding;

extern char emacs_mule_bytes[256];

INLINE_HEADER_END

#endif /* EMACS_CODING_H */