summaryrefslogtreecommitdiff
path: root/Source/WTF/wtf/text/TextBreakIterator.h
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WTF/wtf/text/TextBreakIterator.h
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WTF/wtf/text/TextBreakIterator.h')
-rw-r--r--Source/WTF/wtf/text/TextBreakIterator.h191
1 files changed, 191 insertions, 0 deletions
diff --git a/Source/WTF/wtf/text/TextBreakIterator.h b/Source/WTF/wtf/text/TextBreakIterator.h
new file mode 100644
index 000000000..2bb5f9ca4
--- /dev/null
+++ b/Source/WTF/wtf/text/TextBreakIterator.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#pragma once
+
+#include <wtf/text/StringView.h>
+
+namespace WTF {
+
+// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
+
+enum class LineBreakIteratorMode { Default, Loose, Normal, Strict };
+
+// This is similar to character break iterator in most cases, but is subject to
+// platform UI conventions. One notable example where this can be different
+// from character break iterator is Thai prepend characters, see bug 24342.
+// Use this for insertion point and selection manipulations.
+WTF_EXPORT_PRIVATE UBreakIterator* cursorMovementIterator(StringView);
+
+WTF_EXPORT_PRIVATE UBreakIterator* wordBreakIterator(StringView);
+WTF_EXPORT_PRIVATE UBreakIterator* sentenceBreakIterator(StringView);
+
+WTF_EXPORT_PRIVATE UBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength, LineBreakIteratorMode);
+WTF_EXPORT_PRIVATE void releaseLineBreakIterator(UBreakIterator*);
+UBreakIterator* openLineBreakIterator(const AtomicString& locale);
+void closeLineBreakIterator(UBreakIterator*&);
+
+WTF_EXPORT_PRIVATE bool isWordTextBreak(UBreakIterator*);
+
+class LazyLineBreakIterator {
+public:
+ LazyLineBreakIterator()
+ {
+ resetPriorContext();
+ }
+
+ explicit LazyLineBreakIterator(StringView stringView, const AtomicString& locale = AtomicString(), LineBreakIteratorMode mode = LineBreakIteratorMode::Default)
+ : m_stringView(stringView)
+ , m_locale(locale)
+ , m_mode(mode)
+ {
+ resetPriorContext();
+ }
+
+ ~LazyLineBreakIterator()
+ {
+ if (m_iterator)
+ releaseLineBreakIterator(m_iterator);
+ }
+
+ StringView stringView() const { return m_stringView; }
+ LineBreakIteratorMode mode() const { return m_mode; }
+
+ UChar lastCharacter() const
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ return m_priorContext[1];
+ }
+
+ UChar secondToLastCharacter() const
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ return m_priorContext[0];
+ }
+
+ void setPriorContext(UChar last, UChar secondToLast)
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = secondToLast;
+ m_priorContext[1] = last;
+ }
+
+ void updatePriorContext(UChar last)
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = m_priorContext[1];
+ m_priorContext[1] = last;
+ }
+
+ void resetPriorContext()
+ {
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ m_priorContext[0] = 0;
+ m_priorContext[1] = 0;
+ }
+
+ unsigned priorContextLength() const
+ {
+ unsigned priorContextLength = 0;
+ static_assert(WTF_ARRAY_LENGTH(m_priorContext) == 2, "UBreakIterator unexpected prior context length");
+ if (m_priorContext[1]) {
+ ++priorContextLength;
+ if (m_priorContext[0])
+ ++priorContextLength;
+ }
+ return priorContextLength;
+ }
+
+ // Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
+ // initialized to use the previously stored string as the primary breaking context and using
+ // previously stored prior context if non-empty.
+ UBreakIterator* get(unsigned priorContextLength)
+ {
+ ASSERT(priorContextLength <= priorContextCapacity);
+ const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
+ if (!m_iterator) {
+ m_iterator = acquireLineBreakIterator(m_stringView, m_locale, priorContext, priorContextLength, m_mode);
+ m_cachedPriorContext = priorContext;
+ m_cachedPriorContextLength = priorContextLength;
+ } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
+ resetStringAndReleaseIterator(m_stringView, m_locale, m_mode);
+ return this->get(priorContextLength);
+ }
+ return m_iterator;
+ }
+
+ void resetStringAndReleaseIterator(StringView stringView, const AtomicString& locale, LineBreakIteratorMode mode)
+ {
+ if (m_iterator)
+ releaseLineBreakIterator(m_iterator);
+ m_stringView = stringView;
+ m_locale = locale;
+ m_iterator = nullptr;
+ m_cachedPriorContext = nullptr;
+ m_mode = mode;
+ m_cachedPriorContextLength = 0;
+ }
+
+private:
+ static constexpr unsigned priorContextCapacity = 2;
+ StringView m_stringView;
+ AtomicString m_locale;
+ UBreakIterator* m_iterator { nullptr };
+ const UChar* m_cachedPriorContext { nullptr };
+ LineBreakIteratorMode m_mode { LineBreakIteratorMode::Default };
+ unsigned m_cachedPriorContextLength { 0 };
+ UChar m_priorContext[priorContextCapacity];
+};
+
+// Iterates over "extended grapheme clusters", as defined in UAX #29.
+// Note that platform implementations may be less sophisticated - e.g. ICU prior to
+// version 4.0 only supports "legacy grapheme clusters".
+// Use this for general text processing, e.g. string truncation.
+
+class NonSharedCharacterBreakIterator {
+ WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator);
+public:
+ WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView);
+ WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator();
+
+ NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&);
+
+ operator UBreakIterator*() const { return m_iterator; }
+
+private:
+ UBreakIterator* m_iterator;
+};
+
+// Counts the number of grapheme clusters. A surrogate pair or a sequence
+// of a non-combining character and following combining characters is
+// counted as 1 grapheme cluster.
+WTF_EXPORT_PRIVATE unsigned numGraphemeClusters(StringView);
+
+// Returns the number of characters which will be less than or equal to
+// the specified grapheme cluster length.
+WTF_EXPORT_PRIVATE unsigned numCharactersInGraphemeClusters(StringView, unsigned);
+
+}
+
+using WTF::LazyLineBreakIterator;
+using WTF::LineBreakIteratorMode;
+using WTF::NonSharedCharacterBreakIterator;
+using WTF::isWordTextBreak;