1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
#include <mbgl/style/expression/collator.hpp>
#include <mbgl/util/platform.hpp>
#include <libnu/strcoll.h>
#include <libnu/unaccent.h>
#include <cstring>
#include <sstream>
/*
The default implementation of Collator ignores locale.
Case sensitivity and collation order are based on
Default Unicode Collation Element Table (DUCET).
Diacritic-insensitivity is implemented with nunicode's
non-standard "unaccent" functionality, which is tailored
to European languages.
It would be possible to implement locale awareness using ICU,
but would require bundling locale data.
*/
namespace {
std::string unaccent(const std::string& str)
{
std::stringstream output;
char const *itr = str.c_str(), *nitr;
char const *end = itr + str.length();
char lo[5] = { 0 };
for (; itr < end; itr = nitr)
{
uint32_t code_point = 0;
char const* buf = nullptr;
nitr = _nu_tounaccent(itr, end, nu_utf8_read, &code_point, &buf, nullptr);
if (buf != nullptr)
{
do
{
buf = NU_CASEMAP_DECODING_FUNCTION(buf, &code_point);
if (code_point == 0) break;
output.write(lo, nu_utf8_write(code_point, lo) - lo);
}
while (code_point != 0);
}
else
{
output.write(itr, nitr - itr);
}
}
return output.str();
}
} // namespace
namespace mbgl {
namespace style {
namespace expression {
class Collator::Impl {
public:
Impl(bool caseSensitive_, bool diacriticSensitive_, optional<std::string>)
: caseSensitive(caseSensitive_)
, diacriticSensitive(diacriticSensitive_)
{}
bool operator==(const Impl& other) const {
return caseSensitive == other.caseSensitive &&
diacriticSensitive == other.diacriticSensitive;
}
int compare(const std::string& lhs, const std::string& rhs) const {
if (caseSensitive && diacriticSensitive) {
return nu_strcoll(lhs.c_str(), rhs.c_str(),
nu_utf8_read, nu_utf8_read);
} else if (!caseSensitive && diacriticSensitive) {
return nu_strcasecoll(lhs.c_str(), rhs.c_str(),
nu_utf8_read, nu_utf8_read);
} else if (caseSensitive && !diacriticSensitive) {
return nu_strcoll(unaccent(lhs).c_str(), unaccent(rhs).c_str(),
nu_utf8_read, nu_utf8_read);
} else {
return nu_strcasecoll(unaccent(lhs).c_str(), unaccent(rhs).c_str(),
nu_utf8_read, nu_utf8_read);
}
}
std::string resolvedLocale() const {
return "";
}
private:
bool caseSensitive;
bool diacriticSensitive;
};
Collator::Collator(bool caseSensitive, bool diacriticSensitive, optional<std::string> locale_)
: impl(std::make_shared<Impl>(caseSensitive, diacriticSensitive, std::move(locale_)))
{}
bool Collator::operator==(const Collator& other) const {
return *impl == *(other.impl);
}
int Collator::compare(const std::string& lhs, const std::string& rhs) const {
return impl->compare(lhs, rhs);
}
std::string Collator::resolvedLocale() const {
return impl->resolvedLocale();
}
} // namespace expression
} // namespace style
} // namespace mbgl
|