summaryrefslogtreecommitdiff
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>1997-02-27 11:10:42 +0000
committerKenichi Handa <handa@m17n.org>1997-02-27 11:10:42 +0000
commit0a54824f19a934a8622c42d6dddf241af089c569 (patch)
tree19dd9834ec35514430aefa4a1d354f862faacbf1 /src/coding.c
parentba068c42cd3e1dbaa40f5ca4a32d3650edea2ed3 (diff)
downloademacs-0a54824f19a934a8622c42d6dddf241af089c569.tar.gz
(create_process, Fopen_network_stream): Typo in indexes
of array proc_encode_coding_system fixed. Remove prefix "coding-system-" from coding system symbol names. (encode_coding) : Fix typo ("=" -> "=="). (detect_coding_iso2022): Detect coding-category-iso-8-2 more precisely. (ENCODE_RESET_PLANE_AND_REGISTER): Argument `eol' is deleted. Don't call ENCODE_DESIGNATION if nothing designated initially. (encode_designation_at_bol) New function. (encode_coding_iso2022): Handle CODING_FLAG_ISO_INIT_AT_BOL and CODING_FLAG_ISO_DESIGNATE_AT_BOL. (setup_coding_system): Now, flags of ISO2022 coding systems contains charsets instead of charset IDs. (detect_coding_iso2022, decode_coding_iso2022): Make the code robust against invalid SI and SO. (Ffind_coding_system, syms_of_coding): Escape newlines in docstring. (setup_coding_system): Correct setting coding->symbol and coding->eol_type. The performance improved.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c421
1 files changed, 254 insertions, 167 deletions
diff --git a/src/coding.c b/src/coding.c
index 7d93362e0a3..929e7e666bb 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -581,45 +581,43 @@ int
detect_coding_iso2022 (src, src_end)
unsigned char *src, *src_end;
{
- unsigned char graphic_register[4];
- unsigned char c, esc_cntl;
+ unsigned char c, g1 = 0;
int mask = (CODING_CATEGORY_MASK_ISO_7
| CODING_CATEGORY_MASK_ISO_8_1
| CODING_CATEGORY_MASK_ISO_8_2);
- /* We may look ahead maximum 3 bytes. */
- unsigned char *adjusted_src_end = src_end - 3;
+ /* We may look ahead at most 4 bytes. */
+ unsigned char *adjusted_src_end = src_end - 4;
int i;
- for (i = 0; i < 4; i++)
- graphic_register[i] = CHARSET_ASCII;
-
- while (src < adjusted_src_end)
+ while (src < src_end)
{
c = *src++;
switch (c)
{
case ISO_CODE_ESC:
- if (src >= adjusted_src_end)
+ if (src >= src_end)
break;
c = *src++;
- if (c == '$')
+ if (src + 2 >= src_end
+ && ((c >= '(' && c <= '/')
+ || c == '$' && ((*src >= '(' && *src <= '/')
+ || (*src >= '@' && *src <= 'B'))))
{
- /* Designation of 2-byte character set. */
- if (src >= adjusted_src_end)
- break;
- c = *src++;
+ /* Valid designation sequence. */
+ if (c == ')' || (c == '$' && *src == ')'))
+ g1 = 1;
+ src++;
+ break;
}
- if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
- /* Designation to graphic register 1, 2, or 3. */
- mask &= ~CODING_CATEGORY_MASK_ISO_7;
else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
return CODING_CATEGORY_MASK_ISO_ELSE;
break;
- case ISO_CODE_SI:
case ISO_CODE_SO:
- return CODING_CATEGORY_MASK_ISO_ELSE;
-
+ if (g1)
+ return CODING_CATEGORY_MASK_ISO_ELSE;
+ break;
+
case ISO_CODE_CSI:
case ISO_CODE_SS2:
case ISO_CODE_SS3:
@@ -636,9 +634,9 @@ detect_coding_iso2022 (src, src_end)
int count = 1;
mask &= ~CODING_CATEGORY_MASK_ISO_7;
- while (src < adjusted_src_end && *src >= 0xA0)
+ while (src < src_end && *src >= 0xA0)
count++, src++;
- if (count & 1 && src < adjusted_src_end)
+ if (count & 1 && src < src_end)
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
}
break;
@@ -794,6 +792,8 @@ decode_coding_iso2022 (coding, source, destination,
break;
case ISO_shift_out:
+ if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
+ goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
@@ -830,14 +830,10 @@ decode_coding_iso2022 (coding, source, destination,
case '&': /* revision of following character set */
ONE_MORE_BYTE (c1);
if (!(c1 >= '@' && c1 <= '~'))
- {
- goto label_invalid_escape_sequence;
- }
+ goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
if (c1 != ISO_CODE_ESC)
- {
- goto label_invalid_escape_sequence;
- }
+ goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
goto label_escape_sequence;
@@ -859,26 +855,34 @@ decode_coding_iso2022 (coding, source, destination,
DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
}
else
- {
- goto label_invalid_escape_sequence;
- }
+ goto label_invalid_escape_sequence;
break;
case 'n': /* invocation of locking-shift-2 */
+ if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+ goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
+ charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
case 'o': /* invocation of locking-shift-3 */
+ if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+ goto label_invalid_escape_sequence;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
+ charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
break;
case 'N': /* invocation of single-shift-2 */
+ if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
+ goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
DECODE_ISO_CHARACTER (charset, c1);
break;
case 'O': /* invocation of single-shift-3 */
+ if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
+ goto label_invalid_escape_sequence;
ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
DECODE_ISO_CHARACTER (charset, c1);
@@ -1246,24 +1250,63 @@ encode_invocation_designation (charset, coding, dst)
/* Produce codes for designation and invocation to reset the graphic
planes and registers to initial state. */
-#define ENCODE_RESET_PLANE_AND_REGISTER(eol) \
- do { \
- int reg; \
- if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
- ENCODE_SHIFT_IN; \
- for (reg = 0; reg < 4; reg++) \
- { \
- if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \
- { \
- if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \
- } \
- else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
- != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \
- ENCODE_DESIGNATION \
- (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
- } \
+#define ENCODE_RESET_PLANE_AND_REGISTER \
+ do { \
+ int reg; \
+ if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
+ ENCODE_SHIFT_IN; \
+ for (reg = 0; reg < 4; reg++) \
+ if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
+ && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
+ != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
+ ENCODE_DESIGNATION \
+ (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
} while (0)
+int
+encode_designation_at_bol (coding, src, src_end, dstp)
+ struct coding_system *coding;
+ unsigned char *src, *src_end, **dstp;
+{
+ int charset, reg, r[4];
+ unsigned char *dst = *dstp, c;
+ for (reg = 0; reg < 4; reg++) r[reg] = -1;
+ while (src < src_end && (c = *src++) != '\n')
+ {
+ switch (emacs_code_class[c])
+ {
+ case EMACS_ascii_code:
+ charset = CHARSET_ASCII;
+ break;
+ case EMACS_leading_code_2:
+ if (++src >= src_end) continue;
+ charset = c;
+ break;
+ case EMACS_leading_code_3:
+ if ((src += 2) >= src_end) continue;
+ charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
+ break;
+ case EMACS_leading_code_4:
+ if ((src += 3) >= src_end) continue;
+ charset = *(src - 3);
+ break;
+ default:
+ continue;
+ }
+ reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
+ if (r[reg] < 0
+ && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
+ r[reg] = charset;
+ }
+ if (c != '\n' && !coding->last_block)
+ return -1;
+ for (reg = 0; reg < 4; reg++)
+ if (r[reg] >= 0)
+ ENCODE_DESIGNATION (r[reg], reg, coding);
+ *dstp = dst;
+ return 0;
+}
+
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
int
@@ -1278,10 +1321,10 @@ encode_coding_iso2022 (coding, source, destination,
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 6, we subtract 5
+ /* Since the maximum bytes produced by each loop is 20, we subtract 19
from DST_END to assure overflow checking is necessary only at the
head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 5;
+ unsigned char *adjusted_dst_end = dst_end - 19;
while (src < src_end && dst < adjusted_dst_end)
{
@@ -1291,9 +1334,22 @@ encode_coding_iso2022 (coding, source, destination,
TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
reset to SRC_BASE before exiting. */
unsigned char *src_base = src;
- unsigned char c1 = *src++, c2, c3, c4;
+ unsigned char c1, c2, c3, c4;
int charset;
+ if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
+ && CODING_SPEC_ISO_BOL (coding))
+ {
+ /* We have to produce destination sequences now. */
+ if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
+ /* We can't find end of line in the current block. Let's
+ repeat encoding starting from the current position
+ pointed by SRC. */
+ break;
+ CODING_SPEC_ISO_BOL (coding) = 0;
+ }
+
+ c1 = *src++;
/* If we are seeing a component of a composite character, we are
seeing a leading-code specially encoded for composition, or a
composition rule if composing with rule. We must set C1
@@ -1339,7 +1395,7 @@ encode_coding_iso2022 (coding, source, destination,
case EMACS_control_code:
if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER (0);
+ ENCODE_RESET_PLANE_AND_REGISTER;
*dst++ = c1;
break;
@@ -1347,7 +1403,7 @@ encode_coding_iso2022 (coding, source, destination,
if (!coding->selective)
{
if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER (0);
+ ENCODE_RESET_PLANE_AND_REGISTER;
*dst++ = c1;
break;
}
@@ -1355,7 +1411,11 @@ encode_coding_iso2022 (coding, source, destination,
case EMACS_linefeed_code:
if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
- ENCODE_RESET_PLANE_AND_REGISTER (1);
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
+ bcopy (coding->spec.iso2022.initial_designation,
+ coding->spec.iso2022.current_designation,
+ sizeof coding->spec.iso2022.initial_designation);
if (coding->eol_type == CODING_EOL_LF
|| coding->eol_type == CODING_EOL_AUTOMATIC)
*dst++ = ISO_CODE_LF;
@@ -1363,6 +1423,7 @@ encode_coding_iso2022 (coding, source, destination,
*dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
else
*dst++ = ISO_CODE_CR;
+ CODING_SPEC_ISO_BOL (coding) = 1;
break;
case EMACS_leading_code_2:
@@ -1418,7 +1479,7 @@ encode_coding_iso2022 (coding, source, destination,
the text although they are not valid characters. */
if (coding->last_block)
{
- ENCODE_RESET_PLANE_AND_REGISTER (1);
+ ENCODE_RESET_PLANE_AND_REGISTER;
bcopy(src, dst, src_end - src);
dst += (src_end - src);
src = src_end;
@@ -1985,11 +2046,10 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes, consumed)
return 0. */
int
-setup_coding_system (coding_system_symbol, coding)
- Lisp_Object coding_system_symbol;
+setup_coding_system (coding_system, coding)
+ Lisp_Object coding_system;
struct coding_system *coding;
{
- Lisp_Object coding_system_vector = Qnil;
Lisp_Object type, eol_type;
/* At first, set several fields default values. */
@@ -1999,45 +2059,29 @@ setup_coding_system (coding_system_symbol, coding)
coding->composing = 0;
coding->direction = 0;
coding->carryover_size = 0;
- coding->symbol = Qnil;
coding->post_read_conversion = coding->pre_write_conversion = Qnil;
- /* Get value of property `coding-system'. If it is a Lisp symbol
- pointing another coding system, fetch its property until we get a
- vector. */
- while (!NILP (coding_system_symbol))
+ Vlast_coding_system_used = coding->symbol = coding_system;
+ eol_type = Qnil;
+ /* Get value of property `coding-system' until we get a vector.
+ While doing that, also get values of properties
+ `post-read-conversion', `pre-write-conversion', and `eol-type'. */
+ while (!NILP (coding_system) && SYMBOLP (coding_system))
{
- coding->symbol = coding_system_symbol;
if (NILP (coding->post_read_conversion))
- coding->post_read_conversion = Fget (coding_system_symbol,
+ coding->post_read_conversion = Fget (coding_system,
Qpost_read_conversion);
- if (NILP (coding->pre_write_conversion))
- coding->pre_write_conversion = Fget (coding_system_symbol,
+ if (NILP (coding->pre_write_conversion))
+ coding->pre_write_conversion = Fget (coding_system,
Qpre_write_conversion);
-
- coding_system_vector = Fget (coding_system_symbol, Qcoding_system);
- if (VECTORP (coding_system_vector))
- break;
- coding_system_symbol = coding_system_vector;
+ if (NILP (eol_type))
+ eol_type = Fget (coding_system, Qeol_type);
+ coding_system = Fget (coding_system, Qcoding_system);
}
- Vlast_coding_system_used = coding->symbol;
-
- if (!VECTORP (coding_system_vector)
- || XVECTOR (coding_system_vector)->size != 5)
+ if (!VECTORP (coding_system)
+ || XVECTOR (coding_system)->size != 5)
goto label_invalid_coding_system;
- /* Get value of property `eol-type' by searching from the root
- coding-system. */
- coding_system_symbol = coding->symbol;
- eol_type = Qnil;
- while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
- {
- eol_type = Fget (coding_system_symbol, Qeol_type);
- if (!NILP (eol_type))
- break;
- coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
- }
-
if (VECTORP (eol_type))
coding->eol_type = CODING_EOL_AUTOMATIC;
else if (XFASTINT (eol_type) == 1)
@@ -2047,7 +2091,7 @@ setup_coding_system (coding_system_symbol, coding)
else
coding->eol_type = CODING_EOL_LF;
- type = XVECTOR (coding_system_vector)->contents[0];
+ type = XVECTOR (coding_system)->contents[0];
switch (XFASTINT (type))
{
case 0:
@@ -2061,7 +2105,7 @@ setup_coding_system (coding_system_symbol, coding)
case 2:
coding->type = coding_type_iso2022;
{
- Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+ Lisp_Object val = XVECTOR (coding_system)->contents[4];
Lisp_Object *flags;
int i, charset, default_reg_bits = 0;
@@ -2078,7 +2122,9 @@ setup_coding_system (coding_system_symbol, coding)
| (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
| (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
| (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
- | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION));
+ | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
+ | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
+ | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
/* Invoke graphic register 0 to plane 0. */
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
@@ -2087,6 +2133,8 @@ setup_coding_system (coding_system_symbol, coding)
= (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
/* Not single shifting at first. */
CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
+ /* Beginning of buffer should also be regarded as bol. */
+ CODING_SPEC_ISO_BOL(coding) = 1;
/* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
FLAGS[REG] can be one of below:
@@ -2103,7 +2151,8 @@ setup_coding_system (coding_system_symbol, coding)
for (i = 0; i < 4; i++)
{
if (INTEGERP (flags[i])
- && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
+ && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
+ || (charset = get_charset_id (flags[i])) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
@@ -2119,7 +2168,8 @@ setup_coding_system (coding_system_symbol, coding)
if (INTEGERP (XCONS (tail)->car)
&& (charset = XINT (XCONS (tail)->car),
- CHARSET_VALID_P (charset)))
+ CHARSET_VALID_P (charset))
+ || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
{
CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
@@ -2131,7 +2181,8 @@ setup_coding_system (coding_system_symbol, coding)
{
if (INTEGERP (XCONS (tail)->car)
&& (charset = XINT (XCONS (tail)->car),
- CHARSET_VALID_P (charset)))
+ CHARSET_VALID_P (charset))
+ || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
= i;
else if (EQ (XCONS (tail)->car, Qt))
@@ -2190,7 +2241,7 @@ setup_coding_system (coding_system_symbol, coding)
case 3:
coding->type = coding_type_big5;
coding->flags
- = (NILP (XVECTOR (coding_system_vector)->contents[4])
+ = (NILP (XVECTOR (coding_system)->contents[4])
? CODING_FLAG_BIG5_HKU
: CODING_FLAG_BIG5_ETEN);
break;
@@ -2198,7 +2249,7 @@ setup_coding_system (coding_system_symbol, coding)
case 4:
coding->type = coding_type_ccl;
{
- Lisp_Object val = XVECTOR (coding_system_vector)->contents[4];
+ Lisp_Object val = XVECTOR (coding_system)->contents[4];
if (CONSP (val)
&& VECTORP (XCONS (val)->car)
&& VECTORP (XCONS (val)->cdr))
@@ -2223,6 +2274,8 @@ setup_coding_system (coding_system_symbol, coding)
label_invalid_coding_system:
coding->type = coding_type_no_conversion;
+ coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
+ = Qnil;
return -1;
}
@@ -2236,52 +2289,52 @@ setup_coding_system (coding_system_symbol, coding)
The category for a coding system which has the same code range
as Emacs' internal format. Assigned the coding-system (Lisp
- symbol) `coding-system-internal' by default.
+ symbol) `internal' by default.
o coding-category-sjis
The category for a coding system which has the same code range
as SJIS. Assigned the coding-system (Lisp
- symbol) `coding-system-sjis' by default.
+ symbol) `shift-jis' by default.
o coding-category-iso-7
The category for a coding system which has the same code range
as ISO2022 of 7-bit environment. Assigned the coding-system
- (Lisp symbol) `coding-system-junet' by default.
+ (Lisp symbol) `iso-2022-7' by default.
o coding-category-iso-8-1
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
for DIMENSION1 charset. Assigned the coding-system (Lisp
- symbol) `coding-system-ctext' by default.
+ symbol) `iso-8859-1' by default.
o coding-category-iso-8-2
The category for a coding system which has the same code range
as ISO2022 of 8-bit environment and graphic plane 1 used only
for DIMENSION2 charset. Assigned the coding-system (Lisp
- symbol) `coding-system-euc-japan' by default.
+ symbol) `euc-japan' by default.
o coding-category-iso-else
The category for a coding system which has the same code range
as ISO2022 but not belongs to any of the above three
categories. Assigned the coding-system (Lisp symbol)
- `coding-system-iso-2022-ss2-7' by default.
+ `iso-2022-ss2-7' by default.
o coding-category-big5
The category for a coding system which has the same code range
as BIG5. Assigned the coding-system (Lisp symbol)
- `coding-system-big5' by default.
+ `cn-big5' by default.
o coding-category-binary
The category for a coding system not categorized in any of the
above. Assigned the coding-system (Lisp symbol)
- `coding-system-noconv' by default.
+ `no-conversion' by default.
Each of them is a Lisp symbol and the value is an actual
`coding-system's (this is also a Lisp symbol) assigned by a user.
@@ -2549,7 +2602,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
{
unsigned char *p = destination, *pend = destination + produced;
while (p < pend)
- if (*p++ = '\015') p[-1] = '\n';
+ if (*p++ == '\015') p[-1] = '\n';
}
}
*consumed = produced;
@@ -2687,23 +2740,26 @@ See document of make-coding-system for coding-system object.")
DEFUN ("read-non-nil-coding-system",
Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
- "Read a coding-system from the minibuffer, prompting with string PROMPT.")
+ "Read a coding system from the minibuffer, prompting with string PROMPT.")
(prompt)
Lisp_Object prompt;
{
- return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
- Qt, Qnil, Qnil),
- Qnil);
+ Lisp_Object val;
+ do {
+ val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
+ Qt, Qnil, Qnil);
+ } while (XSTRING (val)->size == 0);
+ return (Fintern (val, Qnil));
}
DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
- "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.")
+ "Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
(prompt)
Lisp_Object prompt;
{
- return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
- Qt, Qnil, Qnil),
- Qnil);
+ Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
+ Qt, Qnil, Qnil);
+ return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
}
DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
@@ -2726,7 +2782,7 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2, 2, 0,
"Detect coding-system of the text in the region between START and END.\n\
Return a list of possible coding-systems ordered by priority.\n\
-If only ASCII characters are found, it returns `coding-system-automatic'\n\
+If only ASCII characters are found, it returns `automatic-conversion'\n\
or its subsidiary coding-system according to a detected end-of-line format.")
(b, e)
Lisp_Object b, e;
@@ -2744,7 +2800,7 @@ If only ASCII characters are found, it returns `coding-system-automatic'\n\
if (coding_mask == CODING_CATEGORY_MASK_ANY)
{
- val = intern ("coding-system-automatic");
+ val = intern ("automatic-conversion");
if (eol_type != CODING_EOL_AUTOMATIC)
{
Lisp_Object val2 = Fget (val, Qeol_type);
@@ -2823,9 +2879,24 @@ shrink_conversion_area (begp, endp, coding, encodep)
case coding_type_ccl:
/* We can't skip any data. */
return;
+ case coding_type_iso2022:
+ if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
+ {
+ unsigned char *bol = beg_addr;
+ while (beg_addr < end_addr && *beg_addr < 0x80)
+ {
+ beg_addr++;
+ if (*(beg_addr - 1) == '\n')
+ bol = beg_addr;
+ }
+ beg_addr = bol;
+ goto label_skip_tail;
+ }
+ /* fall down ... */
default:
/* We can skip all ASCII characters at the head and tail. */
while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
+ label_skip_tail:
while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
break;
}
@@ -2974,8 +3045,8 @@ code_convert_region (b, e, coding, encodep)
}
Lisp_Object
-code_convert_string (str, coding, encodep)
- Lisp_Object str;
+code_convert_string (str, coding, encodep, nocopy)
+ Lisp_Object str, nocopy;
struct coding_system *coding;
int encodep;
{
@@ -3014,7 +3085,7 @@ code_convert_string (str, coding, encodep)
if (begp == endp)
/* We need no conversion. */
- return str;
+ return (NILP (nocopy) ? Fcopy_sequence (str) : str);
head_skip = begp - XSTRING (str)->data;
tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
@@ -3044,8 +3115,10 @@ code_convert_string (str, coding, encodep)
}
DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
- 3, 3, 0,
- "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\
+ 3, 3, "r\nzCoding system: ",
+ "Decode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM. START END are buffer positions.\n\
Return length of decoded text.")
(b, e, coding_system)
Lisp_Object b, e, coding_system;
@@ -3056,6 +3129,8 @@ Return length of decoded text.")
CHECK_NUMBER_COERCE_MARKER (e, 1);
CHECK_SYMBOL (coding_system, 2);
+ if (NILP (coding_system))
+ return make_number (XFASTINT (e) - XFASTINT (b));
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
@@ -3063,8 +3138,10 @@ Return length of decoded text.")
}
DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
- 3, 3, 0,
- "Encode the text between START and END to CODING-SYSTEM.\n\
+ 3, 3, "r\nzCoding system: ",
+ "Encode current region by specified coding system.\n\
+When called from a program, takes three arguments:\n\
+START, END, and CODING-SYSTEM. START END are buffer positions.\n\
Return length of encoded text.")
(b, e, coding_system)
Lisp_Object b, e, coding_system;
@@ -3075,6 +3152,8 @@ Return length of encoded text.")
CHECK_NUMBER_COERCE_MARKER (e, 1);
CHECK_SYMBOL (coding_system, 2);
+ if (NILP (coding_system))
+ return make_number (XFASTINT (e) - XFASTINT (b));
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
@@ -3082,41 +3161,49 @@ Return length of encoded text.")
}
DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
- 2, 2, 0,
- "Decode STRING which is encoded in CODING-SYSTEM, and return the result.")
- (string, coding_system)
- Lisp_Object string, coding_system;
+ 2, 3, 0,
+ "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of decoding.")
+ (string, coding_system, nocopy)
+ Lisp_Object string, coding_system, nocopy;
{
struct coding_system coding;
CHECK_STRING (string, 0);
CHECK_SYMBOL (coding_system, 1);
+ if (NILP (coding_system))
+ return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
- return code_convert_string (string, &coding, 0);
+ return code_convert_string (string, &coding, 0, nocopy);
}
DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
- 2, 2, 0,
- "Encode STRING to CODING-SYSTEM, and return the result.")
- (string, coding_system)
- Lisp_Object string, coding_system;
+ 2, 3, 0,
+ "Encode STRING to CODING-SYSTEM, and return the result.\n\
+Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
+of encoding.")
+ (string, coding_system, nocopy)
+ Lisp_Object string, coding_system, nocopy;
{
struct coding_system coding;
CHECK_STRING (string, 0);
CHECK_SYMBOL (coding_system, 1);
+ if (NILP (coding_system))
+ return (NILP (nocopy) ? Fcopy_sequence (string) : string);
if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
- return code_convert_string (string, &coding, 1);
+ return code_convert_string (string, &coding, 1, nocopy);
}
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
- "Decode a JISX0208 character of SJIS coding-system-sjis.\n\
+ "Decode a JISX0208 character of shift-jis encoding.\n\
CODE is the character code in SJIS.\n\
Return the corresponding character.")
(code)
@@ -3255,7 +3342,7 @@ For each OPERATION, TARGET is selected from the arguments as below:\n\
\n\
The return value is a cons of coding systems for decoding and encoding\n\
registered in nested alist `coding-system-alist' (which see) at a slot\n\
-corresponding to OPERATION and TARGET.
+corresponding to OPERATION and TARGET.\n\
If a function symbol is at the slot, return a result of the function call.\n\
The function is called with one argument, a list of all the arguments.")
(nargs, args)
@@ -3346,6 +3433,39 @@ init_coding_once ()
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
+ conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
+ conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
+
+ setup_coding_system (Qnil, &keyboard_coding);
+ setup_coding_system (Qnil, &terminal_coding);
+}
+
+#ifdef emacs
+
+syms_of_coding ()
+{
+ Qtarget_idx = intern ("target-idx");
+ staticpro (&Qtarget_idx);
+
+ Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
+ Fput (Qwrite_region, Qtarget_idx, make_number (2));
+
+ Qcall_process = intern ("call-process");
+ staticpro (&Qcall_process);
+ Fput (Qcall_process, Qtarget_idx, make_number (0));
+
+ Qcall_process_region = intern ("call-process-region");
+ staticpro (&Qcall_process_region);
+ Fput (Qcall_process_region, Qtarget_idx, make_number (2));
+
+ Qstart_process = intern ("start-process");
+ staticpro (&Qstart_process);
+ Fput (Qstart_process, Qtarget_idx, make_number (2));
+
+ Qopen_network_stream = intern ("open-network-stream");
+ staticpro (&Qopen_network_stream);
+ Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
+
Qcoding_system = intern ("coding-system");
staticpro (&Qcoding_system);
@@ -3389,39 +3509,6 @@ init_coding_once ()
}
}
- conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
- conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
-
- setup_coding_system (Qnil, &keyboard_coding);
- setup_coding_system (Qnil, &terminal_coding);
-}
-
-#ifdef emacs
-
-syms_of_coding ()
-{
- Qtarget_idx = intern ("target-idx");
- staticpro (&Qtarget_idx);
-
- Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
- Fput (Qwrite_region, Qtarget_idx, make_number (2));
-
- Qcall_process = intern ("call-process");
- staticpro (&Qcall_process);
- Fput (Qcall_process, Qtarget_idx, make_number (0));
-
- Qcall_process_region = intern ("call-process-region");
- staticpro (&Qcall_process_region);
- Fput (Qcall_process_region, Qtarget_idx, make_number (2));
-
- Qstart_process = intern ("start-process");
- staticpro (&Qstart_process);
- Fput (Qstart_process, Qtarget_idx, make_number (2));
-
- Qopen_network_stream = intern ("open-network-stream");
- staticpro (&Qopen_network_stream);
- Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
-
defsubr (&Scoding_system_vector);
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
@@ -3472,7 +3559,7 @@ If not, an appropriate element in `coding-system-alist' (which see) is used.");
DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
"Nested alist to decide a coding system for a specific I/O operation.\n\
The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
-
+\n\
OPERATION is one of the following Emacs I/O primitives:\n\
For file I/O, insert-file-contents and write-region.\n\
For process I/O, call-process, call-process-region, and start-process.\n\