From 35256c6e5bb1c217fde45c3e89b0db259d9c9f9b Mon Sep 17 00:00:00 2001 From: Chris Loer Date: Wed, 27 Jun 2018 15:01:54 -0700 Subject: [core] Default "collator" implementation - Based on nunicode - Not locale-aware - Used by linux and Qt builds --- platform/default/collator.cpp | 79 +++++++++++++++++++++++++++++++++++++++++++ platform/default/unaccent.cpp | 43 +++++++++++++++++++++++ platform/default/unaccent.hpp | 13 +++++++ 3 files changed, 135 insertions(+) create mode 100644 platform/default/collator.cpp create mode 100644 platform/default/unaccent.cpp create mode 100644 platform/default/unaccent.hpp (limited to 'platform/default') diff --git a/platform/default/collator.cpp b/platform/default/collator.cpp new file mode 100644 index 0000000000..b7f256756e --- /dev/null +++ b/platform/default/collator.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +/* + The default implementation of Collator ignores locale. + Case sensitivity and collation order are based on + Default Unicode Collation Element Table (DUCET). + + Diacritic-insensitivity is implemented with nunicode's + non-standard "unaccent" functionality, which is tailored + to European languages. + + It would be possible to implement locale awareness using ICU, + but would require bundling locale data. +*/ + +namespace mbgl { +namespace style { +namespace expression { + +class Collator::Impl { +public: + Impl(bool caseSensitive_, bool diacriticSensitive_, optional) + : caseSensitive(caseSensitive_) + , diacriticSensitive(diacriticSensitive_) + {} + + bool operator==(const Impl& other) const { + return caseSensitive == other.caseSensitive && + diacriticSensitive == other.diacriticSensitive; + } + + int compare(const std::string& lhs, const std::string& rhs) const { + if (caseSensitive && diacriticSensitive) { + return nu_strcoll(lhs.c_str(), rhs.c_str(), + nu_utf8_read, nu_utf8_read); + } else if (!caseSensitive && diacriticSensitive) { + return nu_strcasecoll(lhs.c_str(), rhs.c_str(), + nu_utf8_read, nu_utf8_read); + } else if (caseSensitive && !diacriticSensitive) { + return nu_strcoll(platform::unaccent(lhs).c_str(), platform::unaccent(rhs).c_str(), + nu_utf8_read, nu_utf8_read); + } else { + return nu_strcasecoll(platform::unaccent(lhs).c_str(), platform::unaccent(rhs).c_str(), + nu_utf8_read, nu_utf8_read); + } + } + + std::string resolvedLocale() const { + return ""; + } +private: + bool caseSensitive; + bool diacriticSensitive; +}; + + +Collator::Collator(bool caseSensitive, bool diacriticSensitive, optional locale_) + : impl(std::make_shared(caseSensitive, diacriticSensitive, std::move(locale_))) +{} + +bool Collator::operator==(const Collator& other) const { + return *impl == *(other.impl); +} + +int Collator::compare(const std::string& lhs, const std::string& rhs) const { + return impl->compare(lhs, rhs); +} + +std::string Collator::resolvedLocale() const { + return impl->resolvedLocale(); +} + + +} // namespace expression +} // namespace style +} // namespace mbgl diff --git a/platform/default/unaccent.cpp b/platform/default/unaccent.cpp new file mode 100644 index 0000000000..faefb4b4cd --- /dev/null +++ b/platform/default/unaccent.cpp @@ -0,0 +1,43 @@ +#include +#include +#include + +#include +#include + +namespace mbgl { namespace platform { + +std::string unaccent(const std::string& str) +{ + std::stringstream output; + char const *itr = str.c_str(), *nitr; + char const *end = itr + str.length(); + char lo[5] = { 0 }; + + for (; itr < end; itr = nitr) + { + uint32_t code_point = 0; + char const* buf = nullptr; + + nitr = _nu_tounaccent(itr, end, nu_utf8_read, &code_point, &buf, nullptr); + if (buf != nullptr) + { + do + { + buf = NU_CASEMAP_DECODING_FUNCTION(buf, &code_point); + if (code_point == 0) break; + output.write(lo, nu_utf8_write(code_point, lo) - lo); + } + while (code_point != 0); + } + else + { + output.write(itr, nitr - itr); + } + } + + return output.str(); +} + +} // namespace platform +} // namespace mbgl diff --git a/platform/default/unaccent.hpp b/platform/default/unaccent.hpp new file mode 100644 index 0000000000..85ac37a7de --- /dev/null +++ b/platform/default/unaccent.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace mbgl { +namespace platform { + +// Non-locale-aware diacritic folding based on nunicode +// Used as a fallback when locale-aware comparisons aren't available +std::string unaccent(const std::string &string); + +} // namespace platform +} // namespace mbgl -- cgit v1.2.1