summaryrefslogtreecommitdiff
path: root/Source/WebCore/html/parser/HTMLEntityParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/html/parser/HTMLEntityParser.cpp')
-rw-r--r--Source/WebCore/html/parser/HTMLEntityParser.cpp40
1 files changed, 13 insertions, 27 deletions
diff --git a/Source/WebCore/html/parser/HTMLEntityParser.cpp b/Source/WebCore/html/parser/HTMLEntityParser.cpp
index 8d2177fd2..98503d9d1 100644
--- a/Source/WebCore/html/parser/HTMLEntityParser.cpp
+++ b/Source/WebCore/html/parser/HTMLEntityParser.cpp
@@ -32,8 +32,7 @@
#include "HTMLEntitySearch.h"
#include "HTMLEntityTable.h"
#include <wtf/text/StringBuilder.h>
-
-using namespace WTF;
+#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
@@ -44,43 +43,30 @@ static const UChar windowsLatin1ExtensionArray[32] = {
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
};
-static inline bool isAlphaNumeric(UChar cc)
-{
- return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z');
-}
-
class HTMLEntityParser {
public:
- inline static UChar adjustEntity(UChar32 value)
+ static UChar32 legalEntityFor(UChar32 value)
{
- if ((value & ~0x1F) != 0x0080)
+ if (value <= 0 || value > UCHAR_MAX_VALUE || U_IS_SURROGATE(value))
+ return replacementCharacter;
+ if ((value & ~0x1F) != 0x80)
return value;
return windowsLatin1ExtensionArray[value - 0x80];
}
- inline static UChar32 legalEntityFor(UChar32 value)
- {
- // FIXME: A number of specific entity values generate parse errors.
- if (!value || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
- return 0xFFFD;
- if (U_IS_BMP(value))
- return adjustEntity(value);
- return value;
- }
-
- inline static bool acceptMalformed() { return true; }
+ static bool acceptMalformed() { return true; }
- inline static bool consumeNamedEntity(SegmentedString& source, StringBuilder& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter, UChar& cc)
+ static bool consumeNamedEntity(SegmentedString& source, StringBuilder& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter, UChar& cc)
{
StringBuilder consumedCharacters;
HTMLEntitySearch entitySearch;
while (!source.isEmpty()) {
- cc = source.currentChar();
+ cc = source.currentCharacter();
entitySearch.advance(cc);
if (!entitySearch.isEntityPrefix())
break;
consumedCharacters.append(cc);
- source.advanceAndASSERT(cc);
+ source.advancePastNonNewline();
}
notEnoughCharacters = source.isEmpty();
if (notEnoughCharacters) {
@@ -102,17 +88,17 @@ public:
const int length = entitySearch.mostRecentMatch()->length;
const LChar* reference = entitySearch.mostRecentMatch()->entity;
for (int i = 0; i < length; ++i) {
- cc = source.currentChar();
+ cc = source.currentCharacter();
ASSERT_UNUSED(reference, cc == *reference++);
consumedCharacters.append(cc);
- source.advanceAndASSERT(cc);
+ source.advancePastNonNewline();
ASSERT(!source.isEmpty());
}
- cc = source.currentChar();
+ cc = source.currentCharacter();
}
if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
|| !additionalAllowedCharacter
- || !(isAlphaNumeric(cc) || cc == '=')) {
+ || !(isASCIIAlphanumeric(cc) || cc == '=')) {
decodedEntity.append(entitySearch.mostRecentMatch()->firstValue);
if (entitySearch.mostRecentMatch()->secondValue)
decodedEntity.append(entitySearch.mostRecentMatch()->secondValue);