diff options
author | Chris Loer <chris.loer@gmail.com> | 2018-06-27 15:01:54 -0700 |
---|---|---|
committer | Chris Loer <chris.loer@mapbox.com> | 2018-07-03 10:03:05 -0700 |
commit | 35256c6e5bb1c217fde45c3e89b0db259d9c9f9b (patch) | |
tree | 29c94cbd8052c3691bff958d0d175ded7ea5a17d /platform/default | |
parent | 9ff5d34ef2ed2a236cc495f0ad84919cedce9abc (diff) | |
download | qtlocation-mapboxgl-35256c6e5bb1c217fde45c3e89b0db259d9c9f9b.tar.gz |
[core] Default "collator" implementation
- Based on nunicode
- Not locale-aware
- Used by linux and Qt builds
Diffstat (limited to 'platform/default')
-rw-r--r-- | platform/default/collator.cpp | 79 | ||||
-rw-r--r-- | platform/default/unaccent.cpp | 43 | ||||
-rw-r--r-- | platform/default/unaccent.hpp | 13 |
3 files changed, 135 insertions, 0 deletions
diff --git a/platform/default/collator.cpp b/platform/default/collator.cpp new file mode 100644 index 0000000000..b7f256756e --- /dev/null +++ b/platform/default/collator.cpp @@ -0,0 +1,79 @@ +#include <mbgl/style/expression/collator.hpp> +#include <mbgl/util/platform.hpp> +#include <libnu/strcoll.h> +#include <unaccent.hpp> + +/* + The default implementation of Collator ignores locale. + Case sensitivity and collation order are based on + Default Unicode Collation Element Table (DUCET). + + Diacritic-insensitivity is implemented with nunicode's + non-standard "unaccent" functionality, which is tailored + to European languages. + + It would be possible to implement locale awareness using ICU, + but would require bundling locale data. +*/ + +namespace mbgl { +namespace style { +namespace expression { + +class Collator::Impl { +public: + Impl(bool caseSensitive_, bool diacriticSensitive_, optional<std::string>) + : caseSensitive(caseSensitive_) + , diacriticSensitive(diacriticSensitive_) + {} + + bool operator==(const Impl& other) const { + return caseSensitive == other.caseSensitive && + diacriticSensitive == other.diacriticSensitive; + } + + int compare(const std::string& lhs, const std::string& rhs) const { + if (caseSensitive && diacriticSensitive) { + return nu_strcoll(lhs.c_str(), rhs.c_str(), + nu_utf8_read, nu_utf8_read); + } else if (!caseSensitive && diacriticSensitive) { + return nu_strcasecoll(lhs.c_str(), rhs.c_str(), + nu_utf8_read, nu_utf8_read); + } else if (caseSensitive && !diacriticSensitive) { + return nu_strcoll(platform::unaccent(lhs).c_str(), platform::unaccent(rhs).c_str(), + nu_utf8_read, nu_utf8_read); + } else { + return nu_strcasecoll(platform::unaccent(lhs).c_str(), platform::unaccent(rhs).c_str(), + nu_utf8_read, nu_utf8_read); + } + } + + std::string resolvedLocale() const { + return ""; + } +private: + bool caseSensitive; + bool diacriticSensitive; +}; + + +Collator::Collator(bool caseSensitive, bool diacriticSensitive, optional<std::string> locale_) + : impl(std::make_shared<Impl>(caseSensitive, diacriticSensitive, std::move(locale_))) +{} + +bool Collator::operator==(const Collator& other) const { + return *impl == *(other.impl); +} + +int Collator::compare(const std::string& lhs, const std::string& rhs) const { + return impl->compare(lhs, rhs); +} + +std::string Collator::resolvedLocale() const { + return impl->resolvedLocale(); +} + + +} // namespace expression +} // namespace style +} // namespace mbgl diff --git a/platform/default/unaccent.cpp b/platform/default/unaccent.cpp new file mode 100644 index 0000000000..faefb4b4cd --- /dev/null +++ b/platform/default/unaccent.cpp @@ -0,0 +1,43 @@ +#include <mbgl/util/platform.hpp> +#include <libnu/unaccent.h> +#include <unaccent.hpp> + +#include <cstring> +#include <sstream> + +namespace mbgl { namespace platform { + +std::string unaccent(const std::string& str) +{ + std::stringstream output; + char const *itr = str.c_str(), *nitr; + char const *end = itr + str.length(); + char lo[5] = { 0 }; + + for (; itr < end; itr = nitr) + { + uint32_t code_point = 0; + char const* buf = nullptr; + + nitr = _nu_tounaccent(itr, end, nu_utf8_read, &code_point, &buf, nullptr); + if (buf != nullptr) + { + do + { + buf = NU_CASEMAP_DECODING_FUNCTION(buf, &code_point); + if (code_point == 0) break; + output.write(lo, nu_utf8_write(code_point, lo) - lo); + } + while (code_point != 0); + } + else + { + output.write(itr, nitr - itr); + } + } + + return output.str(); +} + +} // namespace platform +} // namespace mbgl diff --git a/platform/default/unaccent.hpp b/platform/default/unaccent.hpp new file mode 100644 index 0000000000..85ac37a7de --- /dev/null +++ b/platform/default/unaccent.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <string> + +namespace mbgl { +namespace platform { + +// Non-locale-aware diacritic folding based on nunicode +// Used as a fallback when locale-aware comparisons aren't available +std::string unaccent(const std::string &string); + +} // namespace platform +} // namespace mbgl |