diff options
| -rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 18 | ||||
| -rw-r--r-- | src/corelib/json/qjsonparser.cpp | 16 | ||||
| -rw-r--r-- | src/corelib/json/qjsonwriter.cpp | 18 | ||||
| -rw-r--r-- | src/corelib/tools/qunicodetables.cpp | 15 | ||||
| -rw-r--r-- | src/corelib/tools/qunicodetables_p.h | 24 | ||||
| -rw-r--r-- | util/unicode/main.cpp | 43 | 
6 files changed, 88 insertions, 46 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 9111ac6379..c3d9dbbd31 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -43,24 +43,12 @@  #include "qlist.h"  #include "qendian.h"  #include "qchar.h" +#include <private/qunicodetables_p.h>  QT_BEGIN_NAMESPACE  enum { Endian = 0, Data = 1 }; -static inline bool isUnicodeNonCharacter(uint ucs4) -{ -    // Unicode has a couple of "non-characters" that one can use internally, -    // but are not allowed to be used for text interchange. -    // -    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, -    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and -    // U+FDEF (inclusive) - -    return (ucs4 & 0xfffe) == 0xfffe -            || (ucs4 - 0xfdd0U) < 32; -} -  QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)  {      uchar replacement = '?'; @@ -120,7 +108,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve                  *cursor++ = 0xc0 | ((uchar) (u >> 6));              } else {                  // is it one of the Unicode non-characters? -                if (isUnicodeNonCharacter(u)) { +                if (QUnicodeTables::isNonCharacter(u)) {                      *cursor++ = replacement;                      ++ch;                      ++invalid; @@ -196,7 +184,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte                      bool nonCharacter;                      if (!headerdone && uc == 0xfeff) {                          // don't do anything, just skip the BOM -                    } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) { +                    } else if (!(nonCharacter = QUnicodeTables::isNonCharacter(uc)) && QChar::requiresSurrogates(uc) && uc < 0x110000) {                          // surrogate pair                          Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());                          *qch++ = QChar::highSurrogate(uc); diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index a17426580f..8c5693c9be 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -45,6 +45,7 @@  #include <qdebug.h>  #include "qjsonparser_p.h"  #include "qjson_p.h" +#include <private/qunicodetables_p.h>  //#define PARSER_DEBUG  #ifdef PARSER_DEBUG @@ -721,19 +722,6 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *      return true;  } -static inline bool isUnicodeNonCharacter(uint ucs4) -{ -    // Unicode has a couple of "non-characters" that one can use internally, -    // but are not allowed to be used for text interchange. -    // -    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, -    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and -    // U+FDEF (inclusive) - -    return (ucs4 & 0xfffe) == 0xfffe -            || (ucs4 - 0xfdd0U) < 32; -} -  static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)  {      int need; @@ -769,7 +757,7 @@ static inline bool scanUtf8Char(const char *&json, const char *end, uint *result          uc = (uc << 6) | (ch & 0x3f);      } -    if (uc < min_uc || isUnicodeNonCharacter(uc) || +    if (uc < min_uc || QUnicodeTables::isNonCharacter(uc) ||          (uc >= 0xd800 && uc <= 0xdfff) || uc >= 0x110000) {          return false;      } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index 7cdc3f0dba..b086cbdea9 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -41,6 +41,7 @@  #include "qjsonwriter_p.h"  #include "qjson_p.h" +#include <private/qunicodetables_p.h>  QT_BEGIN_NAMESPACE @@ -49,21 +50,6 @@ using namespace QJsonPrivate;  static void objectContentToJson(const QJsonPrivate::Object *o, QByteArray &json, int indent, bool compact);  static void arrayContentToJson(const QJsonPrivate::Array *a, QByteArray &json, int indent, bool compact); -// some code from qutfcodec.cpp, inlined here for performance reasons -// to allow fast escaping of strings -static inline bool isUnicodeNonCharacter(uint ucs4) -{ -    // Unicode has a couple of "non-characters" that one can use internally, -    // but are not allowed to be used for text interchange. -    // -    // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, -    // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and -    // U+FDEF (inclusive) - -    return (ucs4 & 0xfffe) == 0xfffe -            || (ucs4 - 0xfdd0U) < 32; -} -  static inline uchar hexdig(uint u)  {      return (u < 0xa ? '0' + u : 'a' + u - 0xa); @@ -154,7 +140,7 @@ static QByteArray escapedString(const QString &s)                  *cursor++ = 0xc0 | ((uchar) (u >> 6));              } else {                  // is it one of the Unicode non-characters? -                if (isUnicodeNonCharacter(u)) { +                if (QUnicodeTables::isNonCharacter(u)) {                      *cursor++ = replacement;                      ++ch;                      continue; diff --git a/src/corelib/tools/qunicodetables.cpp b/src/corelib/tools/qunicodetables.cpp index 04031251e4..9a2a36cd49 100644 --- a/src/corelib/tools/qunicodetables.cpp +++ b/src/corelib/tools/qunicodetables.cpp @@ -4348,6 +4348,21 @@ Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2)      return qGetProp(ucs2);  } +Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4) +{ +    return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak; +} + +Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4) +{ +    return (WordBreak)qGetProp(ucs4)->wordBreak; +} + +Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4) +{ +    return (SentenceBreak)qGetProp(ucs4)->sentenceBreak; +} +  Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)  {      return (LineBreakClass)qGetProp(ucs4)->line_break_class; diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h index 50afebdd9c..15d5415b0b 100644 --- a/src/corelib/tools/qunicodetables_p.h +++ b/src/corelib/tools/qunicodetables_p.h @@ -217,6 +217,18 @@ namespace QUnicodeTables {      }; +    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4); +    inline int graphemeBreakClass(QChar ch) +    { return graphemeBreakClass(ch.unicode()); } + +    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4); +    inline int wordBreakClass(QChar ch) +    { return wordBreakClass(ch.unicode()); } + +    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4); +    inline int sentenceBreakClass(QChar ch) +    { return sentenceBreakClass(ch.unicode()); } +      Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);      inline int lineBreakClass(QChar ch)      { return lineBreakClass(ch.unicode()); } @@ -225,6 +237,18 @@ namespace QUnicodeTables {      inline int script(QChar ch)      { return script(ch.unicode()); } + +    inline bool isNonCharacter(uint ucs4) +    { +        // Noncharacter_Code_Point: +        // Unicode has a couple of "non-characters" that one can use internally, +        // but are not allowed to be used for text interchange. +        // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF, +        // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF + +        return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe); +    } +  } // namespace QUnicodeTables  QT_END_NAMESPACE diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index b9245ba387..42360f0628 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -397,6 +397,18 @@ static const char *property_string =      "    Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);\n";  static const char *methods = +    "    Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4);\n" +    "    inline int graphemeBreakClass(QChar ch)\n" +    "    { return graphemeBreakClass(ch.unicode()); }\n" +    "\n" +    "    Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4);\n" +    "    inline int wordBreakClass(QChar ch)\n" +    "    { return wordBreakClass(ch.unicode()); }\n" +    "\n" +    "    Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4);\n" +    "    inline int sentenceBreakClass(QChar ch)\n" +    "    { return sentenceBreakClass(ch.unicode()); }\n" +    "\n"      "    Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4);\n"      "    inline int lineBreakClass(QChar ch)\n"      "    { return lineBreakClass(ch.unicode()); }\n" @@ -405,6 +417,18 @@ static const char *methods =      "    inline int script(QChar ch)\n"      "    { return script(ch.unicode()); }\n\n"; +static const char *generated_methods = +    "    inline bool isNonCharacter(uint ucs4)\n" +    "    {\n" +    "        // Noncharacter_Code_Point:\n" +    "        // Unicode has a couple of \"non-characters\" that one can use internally,\n" +    "        // but are not allowed to be used for text interchange.\n" +    "        // Those are the last two entries each Unicode Plane (U+FFFE..U+FFFF,\n" +    "        // U+1FFFE..U+1FFFF, etc.) as well as the entries in range U+FDD0..U+FDEF\n" +    "\n" +    "        return ucs4 >= 0xfdd0 && (ucs4 <= 0xfdef || (ucs4 & 0xfffe) == 0xfffe);\n" +    "    }\n\n"; +  static const int SizeOfPropertiesStruct = 20;  struct PropertyFlags { @@ -2275,7 +2299,22 @@ static QByteArray createPropertyInfo()             "    return qGetProp(ucs2);\n"             "}\n\n"; -    out += "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n" +    out += "Q_CORE_EXPORT GraphemeBreak QT_FASTCALL graphemeBreakClass(uint ucs4)\n" +           "{\n" +           "    return (GraphemeBreak)qGetProp(ucs4)->graphemeBreak;\n" +           "}\n" +           "\n" +           "Q_CORE_EXPORT WordBreak QT_FASTCALL wordBreakClass(uint ucs4)\n" +           "{\n" +           "    return (WordBreak)qGetProp(ucs4)->wordBreak;\n" +           "}\n" +           "\n" +           "Q_CORE_EXPORT SentenceBreak QT_FASTCALL sentenceBreakClass(uint ucs4)\n" +           "{\n" +           "    return (SentenceBreak)qGetProp(ucs4)->sentenceBreak;\n" +           "}\n" +           "\n" +           "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(uint ucs4)\n"             "{\n"             "    return (LineBreakClass)qGetProp(ucs4)->line_break_class;\n"             "}\n\n"; @@ -2868,6 +2907,8 @@ int main(int, char **)      f.write(line_break_class_string);      f.write("\n");      f.write(methods); +    f.write("\n"); +    f.write(generated_methods);      f.write("} // namespace QUnicodeTables\n\n"              "QT_END_NAMESPACE\n\n"              "#endif // QUNICODETABLES_P_H\n");  | 
