summaryrefslogtreecommitdiff
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa <handa@m17n.org>1998-09-26 04:20:48 +0000
committerKenichi Handa <handa@m17n.org>1998-09-26 04:20:48 +0000
commitde79a6a5ed49e728d1ee62efd9b1542cb72c095d (patch)
tree3f8e9f4c034b93a63065244b5ab0b68d37fa10a2 /src/coding.c
parent450c60a5597beb1aea1a549f53baece4e7d26983 (diff)
downloademacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.tar.gz
(check_composing_code): If the current composing
sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c123
1 files changed, 82 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c
index fa2bbc620a0..5c3299b6b56 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
/* Decode one ASCII character C. */
-#define DECODE_CHARACTER_ASCII(c) \
- do { \
- if (COMPOSING_P (coding->composing)) \
- *dst++ = 0xA0, *dst++ = (c) | 0x80; \
- else \
- { \
- *dst++ = (c); \
- coding->produced_char++; \
- } \
+#define DECODE_CHARACTER_ASCII(c) \
+ do { \
+ if (COMPOSING_P (coding->composing)) \
+ { \
+ *dst++ = 0xA0, *dst++ = (c) | 0x80; \
+ coding->composed_chars++; \
+ } \
+ else \
+ { \
+ *dst++ = (c); \
+ coding->produced_char++; \
+ } \
} while (0)
/* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
do { \
unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
if (COMPOSING_P (coding->composing)) \
- *dst++ = leading_code + 0x20; \
+ { \
+ *dst++ = leading_code + 0x20; \
+ coding->composed_chars++; \
+ } \
else \
{ \
*dst++ = leading_code; \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
invalid_code_found = 1;
}
}
- return (invalid_code_found
- ? src - src_start
- : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
+ return (invalid_code_found ? src - src_start : -1);
}
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
translation_table = Vstandard_translation_table_for_decode;
coding->produced_char = 0;
+ coding->composed_chars = 0;
coding->fake_multibyte = 0;
while (src < src_end && (dst_bytes
? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
coding->composing = (c1 == '0'
? COMPOSING_NO_RULE_HEAD
: COMPOSING_WITH_RULE_HEAD);
- coding->produced_char++;
+ coding->composed_chars = 0;
}
else if (result1 > 0)
{
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
src += result1;
dst += result1 + 2;
coding->produced_char += result1 + 2;
+ coding->fake_multibyte = 1;
}
else
{
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
break;
case '1': /* end composing */
+ if (coding->composed_chars > 0)
+ {
+ if (coding->composed_chars == 1)
+ {
+ unsigned char *this_char_start = dst;
+ int this_bytes;
+
+ /* Only one character is in the composing
+ sequence. Make it a normal character. */
+ while (*--this_char_start != LEADING_CODE_COMPOSITION);
+ dst = (this_char_start
+ + (coding->composing == COMPOSING_NO_RULE_TAIL
+ ? 1 : 2));
+ *dst -= 0x20;
+ if (*dst == 0x80)
+ *++dst &= 0x7F;
+ this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+ while (this_bytes--) *this_char_start++ = *dst++;
+ dst = this_char_start;
+ }
+ coding->produced_char++;
+ }
coding->composing = COMPOSING_NO;
break;
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
case CODING_CATEGORY_IDX_ISO_7:
case CODING_CATEGORY_IDX_ISO_7_TIGHT:
- /* We can skip all charactes at the tail except for ESC and
- the following 2-byte at the tail. */
- if (eol_conversion)
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
- endp--;
- else
- while (begp < endp
- && (c = endp[-1]) != ISO_CODE_ESC)
- endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- if (begp < endp && endp[-1] == ISO_CODE_ESC)
- {
- if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
- /* This is an ASCII designation sequence. We can
- surely skip the tail. */
- endp += 2;
- else
- /* Hmmm, we can't skip the tail. */
- endp = endp_orig;
- }
+ {
+ /* We can skip all charactes at the tail except for 8-bit
+ codes and ESC and the following 2-byte at the tail. */
+ unsigned char *eight_bit = NULL;
+
+ if (eol_conversion)
+ while (begp < endp
+ && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
+ {
+ if (!eight_bit && c & 0x80) eight_bit = endp;
+ endp--;
+ }
+ else
+ while (begp < endp
+ && (c = endp[-1]) != ISO_CODE_ESC)
+ {
+ if (!eight_bit && c & 0x80) eight_bit = endp;
+ endp--;
+ }
+ /* Do not consider LF as ascii if preceded by CR, since that
+ confuses eol decoding. */
+ if (begp < endp && endp < endp_orig
+ && endp[-1] == '\r' && endp[0] == '\n')
+ endp++;
+ if (begp < endp && endp[-1] == ISO_CODE_ESC)
+ {
+ if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+ /* This is an ASCII designation sequence. We can
+ surely skip the tail. But, if we have
+ encountered an 8-bit code, skip only the codes
+ after that. */
+ endp = eight_bit ? eight_bit : endp + 2;
+ else
+ /* Hmmm, we can't skip the tail. */
+ endp = endp_orig;
+ }
+ else if (eight_bit)
+ endp = eight_bit;
+ }
}
}
*beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
else
shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
}
- if (from == to_byte
- && ! (coding->mode & CODING_MODE_LAST_BLOCK
- && CODING_REQUIRE_FLUSHING (coding)))
+ if (from == to_byte)
return (nocopy ? str : Fcopy_sequence (str));
if (encodep)