summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcrvi <crvisqr@gmail.com>2021-03-18 19:03:47 +0530
committerBastien Nocera <hadess@hadess.net>2021-06-24 16:18:47 +0200
commite84cf43c7080774f5148a82e33118ad3f902a003 (patch)
treea5d82a4314a71c7d590d519a5dbae28bf39e1202
parente85f88f82cbec9c6e503ebdcc6cb8549883a3cf9 (diff)
downloadtotem-pl-parser-e84cf43c7080774f5148a82e33118ad3f902a003.tar.gz
plparser: Print illegal char byte sequence info during utf-8 conversion error
This is useful for debugging invalid XML documents. Provides the following debug information. Invalid byte sequence in conversion input: byte offset 22493, byte: '\xe2', byte context: 'ience from Valentine\xe2'
-rw-r--r--plparse/totem-pl-parser.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/plparse/totem-pl-parser.c b/plparse/totem-pl-parser.c
index 534d268..4656f6e 100644
--- a/plparse/totem-pl-parser.c
+++ b/plparse/totem-pl-parser.c
@@ -159,6 +159,7 @@
#define READ_CHUNK_SIZE 8192
#define RECURSE_LEVEL_MAX 4
+#define ILLEGAL_CONTEXT_LENGTH 20
#define D(x) if (debug) x
@@ -1885,7 +1886,7 @@ totem_pl_parser_parse_xml_relaxed (char *contents,
g_autoptr(GError) error = NULL;
g_autofree char *encoding = NULL;
g_autofree char *new_contents = NULL;
- gsize new_size;
+ gsize new_size, bytes_read;
xml_parser_t *xml_parser;
totem_pl_parser_cleanup_xml (contents);
@@ -1921,10 +1922,23 @@ totem_pl_parser_parse_xml_relaxed (char *contents,
xml_parser_free_tree (doc);
- new_contents = g_convert (contents, size, "UTF-8", encoding, NULL, &new_size, &error);
+ new_contents = g_convert (contents, size, "UTF-8", encoding, &bytes_read, &new_size, &error);
if (new_contents == NULL) {
- g_warning ("Failed to convert XML data from '%s' to '%s': %s",
- encoding, "UTF-8", error->message);
+ g_autofree char *message = NULL;
+ message = g_strdup_printf ("Failed to convert XML data from '%s' to '%s': %s",
+ encoding, "UTF-8", error->message);
+
+ if (error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE) {
+ int context_length = MIN (bytes_read, ILLEGAL_CONTEXT_LENGTH);
+
+ g_warning ("%s: byte offset %" G_GSIZE_FORMAT ", byte: '%.1s', byte context: '%.*s'",
+ message, bytes_read, contents + bytes_read,
+ context_length + 1,
+ contents + bytes_read - context_length);
+ } else {
+ g_warning ("%s", message);
+ }
+
return NULL;
}