diff options
author | Bram Moolenaar <Bram@vim.org> | 2008-06-04 17:37:34 +0000 |
---|---|---|
committer | Bram Moolenaar <Bram@vim.org> | 2008-06-04 17:37:34 +0000 |
commit | f453d35dbed66362221b6871c076e945e9d955b4 (patch) | |
tree | 7aef13dd5c618c10a89c096e0964af3dd396ae03 /src/fileio.c | |
parent | c4ea3f46e8b05b08cc239737f36e3618fc66c725 (diff) | |
download | vim-git-f453d35dbed66362221b6871c076e945e9d955b4.tar.gz |
updated for version 7.1-310v7.1.310
Diffstat (limited to 'src/fileio.c')
-rw-r--r-- | src/fileio.c | 124 |
1 files changed, 80 insertions, 44 deletions
diff --git a/src/fileio.c b/src/fileio.c index 95970a3fd..de123a09c 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1288,12 +1288,49 @@ retry: #ifdef FEAT_MBYTE else if (conv_restlen > 0) { - /* Reached end-of-file but some trailing bytes could - * not be converted. Truncated file? */ - if (conv_error == 0) - conv_error = linecnt; - if (bad_char_behavior != BAD_DROP) + /* + * Reached end-of-file but some trailing bytes could + * not be converted. Truncated file? + */ + + /* When we did a conversion report an error. */ + if (fio_flags != 0 +# ifdef USE_ICONV + || iconv_fd != (iconv_t)-1 +# endif + ) { + if (conv_error == 0) + conv_error = curbuf->b_ml.ml_line_count + - linecnt + 1; + } + /* Remember the first linenr with an illegal byte */ + else if (illegal_byte == 0) + illegal_byte = curbuf->b_ml.ml_line_count + - linecnt + 1; + if (bad_char_behavior == BAD_DROP) + { + *(ptr - conv_restlen) = NUL; + conv_restlen = 0; + } + else + { + /* Replace the trailing bytes with the replacement + * character if we were converting; if we weren't, + * leave the UTF8 checking code to do it, as it + * works slightly differently. */ + if (bad_char_behavior != BAD_KEEP && (fio_flags != 0 +# ifdef USE_ICONV + || iconv_fd != (iconv_t)-1 +# endif + )) + { + while (conv_restlen > 0) + { + *(--ptr) = bad_char_behavior; + --conv_restlen; + } + } fio_flags = 0; /* don't convert this */ # ifdef USE_ICONV if (iconv_fd != (iconv_t)-1) @@ -1302,20 +1339,6 @@ retry: iconv_fd = (iconv_t)-1; } # endif - if (bad_char_behavior == BAD_KEEP) - { - /* Keep the trailing bytes as-is. */ - size = conv_restlen; - ptr -= conv_restlen; - } - else - { - /* Replace the trailing bytes with the - * replacement character. */ - size = 1; - *--ptr = bad_char_behavior; - } - conv_restlen = 0; } } #endif @@ -1397,6 +1420,11 @@ retry: goto retry; } } + + /* Include not converted bytes. */ + ptr -= conv_restlen; + size += conv_restlen; + conv_restlen = 0; #endif /* * Break here for a read error or end-of-file. @@ -1406,11 +1434,6 @@ retry: #ifdef FEAT_MBYTE - /* Include not converted bytes. */ - ptr -= conv_restlen; - size += conv_restlen; - conv_restlen = 0; - # ifdef USE_ICONV if (iconv_fd != (iconv_t)-1) { @@ -1872,12 +1895,12 @@ retry: size = (long)((ptr + real_size) - dest); ptr = dest; } - else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin) + else if (enc_utf8 && !curbuf->b_p_bin) { - /* Reading UTF-8: Check if the bytes are valid UTF-8. - * Need to start before "ptr" when part of the character was - * read in the previous read() call. */ - for (p = ptr - utf_head_off(buffer, ptr); ; ++p) + int incomplete_tail = FALSE; + + /* Reading UTF-8: Check if the bytes are valid UTF-8. */ + for (p = ptr; ; ++p) { int todo = (int)((ptr + size) - p); int l; @@ -1891,43 +1914,56 @@ retry: * read() will get the next bytes, we'll check it * then. */ l = utf_ptr2len_len(p, todo); - if (l > todo) + if (l > todo && !incomplete_tail) { - /* Incomplete byte sequence, the next read() - * should get them and check the bytes. */ - p += todo; - break; + /* Avoid retrying with a different encoding when + * a truncated file is more likely, or attempting + * to read the rest of an incomplete sequence when + * we have already done so. */ + if (p > ptr || filesize > 0) + incomplete_tail = TRUE; + /* Incomplete byte sequence, move it to conv_rest[] + * and try to read the rest of it, unless we've + * already done so. */ + if (p > ptr) + { + conv_restlen = todo; + mch_memmove(conv_rest, p, conv_restlen); + size -= conv_restlen; + break; + } } - if (l == 1) + if (l == 1 || l > todo) { /* Illegal byte. If we can try another encoding - * do that. */ - if (can_retry) + * do that, unless at EOF where a truncated + * file is more likely than a conversion error. */ + if (can_retry && !incomplete_tail) break; - - /* Remember the first linenr with an illegal byte */ - if (illegal_byte == 0) - illegal_byte = readfile_linenr(linecnt, ptr, p); # ifdef USE_ICONV /* When we did a conversion report an error. */ if (iconv_fd != (iconv_t)-1 && conv_error == 0) conv_error = readfile_linenr(linecnt, ptr, p); # endif + /* Remember the first linenr with an illegal byte */ + if (conv_error == 0 && illegal_byte == 0) + illegal_byte = readfile_linenr(linecnt, ptr, p); /* Drop, keep or replace the bad byte. */ if (bad_char_behavior == BAD_DROP) { - mch_memmove(p, p+1, todo - 1); + mch_memmove(p, p + 1, todo - 1); --p; --size; } else if (bad_char_behavior != BAD_KEEP) *p = bad_char_behavior; } - p += l - 1; + else + p += l - 1; } } - if (p < ptr + size) + if (p < ptr + size && !incomplete_tail) { /* Detected a UTF-8 error. */ rewind_retry: |