summaryrefslogtreecommitdiff
path: root/chromium/third_party/libaddressinput/chromium/canonicalize_string.cc
blob: d1fc1ce10aade45be456556d7bc00fd08740acca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "third_party/libaddressinput/src/cpp/src/util/canonicalize_string.h"

#include <stdint.h>

#include "base/logging.h"
#include "base/macros.h"
#include "third_party/icu/source/common/unicode/errorcode.h"
#include "third_party/icu/source/common/unicode/locid.h"
#include "third_party/icu/source/common/unicode/unistr.h"
#include "third_party/icu/source/common/unicode/utypes.h"
#include "third_party/icu/source/i18n/unicode/coll.h"
#include "third_party/libaddressinput/src/cpp/include/libaddressinput/util/scoped_ptr.h"

namespace i18n {
namespace addressinput {

namespace {

class ChromeStringCanonicalizer : public StringCanonicalizer {
 public:
  ChromeStringCanonicalizer()
      : error_code_(U_ZERO_ERROR),
        collator_(
            icu::Collator::createInstance(
                icu::Locale::getRoot(), error_code_)) {
    collator_->setStrength(icu::Collator::PRIMARY);
    DCHECK(U_SUCCESS(error_code_));
  }

  virtual ~ChromeStringCanonicalizer() {}

  // StringCanonicalizer implementation.
  virtual std::string CanonicalizeString(const std::string& original) {
    // Returns a canonical version of the string that can be used for comparing
    // strings regardless of diacritics and capitalization.
    //    CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
    //    CanonicalizeString("Texas") == CanonicalizeString("teXas");
    //    CanonicalizeString("Texas") != CanonicalizeString("California");
    //
    // The output is not human-readable.
    //    CanonicalizeString("Texas") != "Texas";
    icu::UnicodeString icu_str(
        original.c_str(), static_cast<int32_t>(original.length()));
    int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
    scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
    DCHECK(buffer.get());
    int32_t filled_size =
        collator_->getSortKey(icu_str, buffer.get(), buffer_size);
    DCHECK_EQ(buffer_size, filled_size);
    return std::string(reinterpret_cast<const char*>(buffer.get()));
  }

 private:
  UErrorCode error_code_;
  scoped_ptr<icu::Collator> collator_;

  DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer);
};

}  // namespace

// static
scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
  return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer);
}

}  // namespace addressinput
}  // namespace i18n