diff options
author | Krzysztof Piecuch <piecuch@protonmail.com> | 2017-03-15 02:13:15 +0100 |
---|---|---|
committer | Murray Cumming <murrayc@murrayc.com> | 2017-03-15 10:00:26 +0100 |
commit | 0797bf2954177f58b7ac6ebecce7264310481c55 (patch) | |
tree | ff9c02675aeafdf22b4fc254d2a0b413e779f0cd /tests | |
parent | e3465005619f8428c84b6c53cfd73d2dbab5ed4c (diff) | |
download | glibmm-0797bf2954177f58b7ac6ebecce7264310481c55.tar.gz |
Added ustring::make_valid() which fixes non-UTF8 strings.
make_valid replaces all non-UTF8 characters with replacement
character (U+FFFD). Allows manipulating with ustring after you find
out by ustring::validate() that it's not an UTF-8 string and you
need to rescue it somehow.
This wraps g_utf8_make_valid().
Bug #780075
Diffstat (limited to 'tests')
-rw-r--r-- | tests/Makefile.am | 4 | ||||
-rw-r--r-- | tests/glibmm_ustring_make_valid/main.cc | 58 |
2 files changed, 61 insertions, 1 deletions
diff --git a/tests/Makefile.am b/tests/Makefile.am index bbe85ae4..4a0d2fe5 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -48,7 +48,8 @@ check_PROGRAMS = \ glibmm_refptr/test \ glibmm_refptr_sigc_bind/test \ glibmm_weakref/test \ - glibmm_bytearray/test + glibmm_bytearray/test \ + glibmm_ustring_make_valid/test TESTS = $(check_PROGRAMS) @@ -123,3 +124,4 @@ glibmm_refptr_sigc_bind_test_SOURCES = glibmm_refptr_sigc_bind/main.cc glibmm_weakref_test_SOURCES = glibmm_weakref/main.cc glibmm_weakref_test_LDADD = $(giomm_ldadd) glibmm_bytearray_test_SOURCES = glibmm_bytearray/main.cc +glibmm_ustring_make_valid_test_SOURCES = glibmm_ustring_make_valid/main.cc diff --git a/tests/glibmm_ustring_make_valid/main.cc b/tests/glibmm_ustring_make_valid/main.cc new file mode 100644 index 00000000..3f941225 --- /dev/null +++ b/tests/glibmm_ustring_make_valid/main.cc @@ -0,0 +1,58 @@ +#include <iostream> +#include <glibmm.h> + +int +main() +{ + Glib::init(); + + // 0-1: bad character + const char not_utf8[] = { '\x80', + // 1-4: good three bytes (one character) + '\xef', '\x80', '\x80', + // 4-5: bad character + '\xef', + // 5-6: bad character + '\x80', + // 6-7: good character + 'a', + // 7-8: bad character + '\0', + // 8-9: good character + 'd', + // 9-10: bad character + '\x80', + // 10-13: good three bytes (one character) + '\xef', '\x80', '\x80', + // 13-15: two bad characters + '\xef', '\x80' + }; + + const char fixed_utf8[] = { '\xef', '\xbf', '\xbd', + '\xef', '\x80', '\x80', + '\xef', '\xbf', '\xbd', + '\xef', '\xbf', '\xbd', + 'a', + '\xef', '\xbf', '\xbd', + 'd', + '\xef', '\xbf', '\xbd', + '\xef', '\x80', '\x80', + '\xef', '\xbf', '\xbd', + '\xef', '\xbf', '\xbd' + }; + + // const char repl_character[] = {'\xef', '\xbf', '\xbd'}; + const Glib::ustring s(not_utf8, not_utf8 + sizeof not_utf8); + g_assert(s.validate() == false); + + const Glib::ustring good_one = s.make_valid(); + g_assert(s.validate() == false); // we make a copy + g_assert(good_one.validate()); // this one is good! + + const Glib::ustring correct_output(fixed_utf8, + fixed_utf8 + sizeof fixed_utf8); + g_assert(correct_output.validate()); + g_assert(correct_output == good_one); + + return EXIT_SUCCESS; +} |