{ Unicode "Character" properties handler. Copyright (c) 2012 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: - object files and libraries linked into an application may be distributed without source code. If you didn't receive a copy of the file COPYING, contact: Free Software Foundation 675 Mass Ave Cambridge, MA 02139 USA This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. } unit Character; interface {$ifndef VER2_4} {$mode objfpc} {$H+} {$PACKENUM 1} {$SCOPEDENUMS ON} uses unicodedata; type // Unicode General Category TUnicodeCategory = ( ucUppercaseLetter, // Lu = Letter, uppercase ucLowercaseLetter, // Ll = Letter, lowercase ucTitlecaseLetter, // Lt = Letter, titlecase ucModifierLetter, // Lm = Letter, modifier ucOtherLetter, // Lo = Letter, other ucNonSpacingMark, // Mn = Mark, nonspacing ucCombiningMark, // Mc = Mark, spacing combining ucEnclosingMark, // Me = Mark, enclosing ucDecimalNumber, // Nd = Number, decimal digit ucLetterNumber, // Nl = Number, letter ucOtherNumber, // No = Number, other ucConnectPunctuation, // Pc = Punctuation, connector ucDashPunctuation, // Pd = Punctuation, dash ucOpenPunctuation, // Ps = Punctuation, open ucClosePunctuation, // Pe = Punctuation, close ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage) ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage) ucOtherPunctuation, // Po = Punctuation, other ucMathSymbol, // Sm = Symbol, math ucCurrencySymbol, // Sc = Symbol, currency ucModifierSymbol, // Sk = Symbol, modifier ucOtherSymbol, // So = Symbol, other ucSpaceSeparator, // Zs = Separator, space ucLineSeparator, // Zl = Separator, line ucParagraphSeparator, // Zp = Separator, paragraph ucControl, // Cc = Other, control ucFormat, // Cf = Other, format ucSurrogate, // Cs = Other, surrogate ucPrivateUse, // Co = Other, private use ucUnassigned // Cn = Other, not assigned (including noncharacters) ); TUnicodeCategorySet = set of TUnicodeCategory; TCharacterOption = (coIgnoreInvalidSequence); TCharacterOptions = set of TCharacterOption; { TCharacter } TCharacter = class sealed private class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static; class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static; public constructor Create; class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static; class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static; class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static; class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static; class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload; class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static; class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline; class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static; class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline; class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline; class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline; class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static; class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static; class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static; class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static; class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static; class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static; end; // flat functions function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; function GetNumericValue(AChar : UnicodeChar) : Double; overload; function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; function IsControl(AChar : UnicodeChar) : Boolean; overload; function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsDigit(AChar : UnicodeChar) : Boolean; overload; function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsLetter(AChar : UnicodeChar) : Boolean; overload; function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsLower(AChar : UnicodeChar) : Boolean; overload; function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsNumber(AChar : UnicodeChar) : Boolean; overload; function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsSeparator(AChar : UnicodeChar) : Boolean; overload; function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsSymbol(AChar : UnicodeChar) : Boolean; overload; function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsUpper(AChar : UnicodeChar) : Boolean; overload; function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; function ToLower(const AString : UnicodeString) : UnicodeString; overload; function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; function ToUpper(const AString : UnicodeString) : UnicodeString; overload; {$endif VER2_4} implementation {$ifndef VER2_4} uses SysUtils, RtlConsts; const LETTER_CATEGORIES = [ TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter, TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter, TUnicodeCategory.ucOtherLetter ]; LETTER_OR_DIGIT_CATEGORIES = LETTER_CATEGORIES + [TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber]; NUMBER_CATEGORIES = [ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber, TUnicodeCategory.ucOtherNumber ]; PUNCTUATION_CATEGORIES = [ TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation, TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation, TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation, TUnicodeCategory.ucOtherPunctuation ]; SEPARATOR_CATEGORIES = [ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator, TUnicodeCategory.ucParagraphSeparator ]; SYMBOL_CATEGORIES = [ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol, TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol ]; function ConvertFromUtf32(AChar: UCS4Char): UnicodeString; begin Result := TCharacter.ConvertFromUtf32(AChar); end; function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char; begin Result := TCharacter.ConvertToUtf32(AString, AIndex); end; function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char; begin Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength); end; function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char; begin Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate); end; function GetNumericValue(AChar: UnicodeChar): Double; begin Result := TCharacter.GetNumericValue(AChar); end; function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double; begin Result := TCharacter.GetNumericValue(AString, AIndex); end; function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory; begin Result := TCharacter.GetUnicodeCategory(AChar); end; function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory; begin Result := TCharacter.GetUnicodeCategory(AString, AIndex); end; function IsControl(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsControl(AChar); end; function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsControl(AString, AIndex); end; function IsDigit(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsDigit(AChar); end; function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsDigit(AString, AIndex); end; function IsSurrogate(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsSurrogate(AChar); end; function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsSurrogate(AString, AIndex); end; function IsHighSurrogate(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsHighSurrogate(AChar); end; function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsHighSurrogate(AString, AIndex); end; function IsLowSurrogate(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsLowSurrogate(AChar); end; function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsLowSurrogate(AString, AIndex); end; function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean; begin Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate); end; function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsSurrogatePair(AString, AIndex); end; function IsLetter(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsLetter(AChar); end; function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsLetter(AString, AIndex); end; function IsLetterOrDigit(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsLetterOrDigit(AChar); end; function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsLetterOrDigit(AString, AIndex); end; function IsLower(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsLower(AChar); end; function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsLower(AString, AIndex); end; function IsNumber(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsNumber(AChar); end; function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsNumber(AString, AIndex); end; function IsPunctuation(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsPunctuation(AChar); end; function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsPunctuation(AString, AIndex); end; function IsSeparator(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsSeparator(AChar); end; function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsSeparator(AString, AIndex); end; function IsSymbol(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsSymbol(AChar); end; function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsSymbol(AString, AIndex); end; function IsUpper(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsUpper(AChar); end; function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsUpper(AString, AIndex); end; function IsWhiteSpace(AChar: UnicodeChar): Boolean; begin Result := TCharacter.IsWhiteSpace(AChar); end; function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean; begin Result := TCharacter.IsWhiteSpace(AString, AIndex); end; function ToLower(AChar: UnicodeChar): UnicodeChar; begin Result := TCharacter.ToLower(AChar); end; function ToLower(const AString: UnicodeString): UnicodeString; begin Result := TCharacter.ToLower(AString); end; function ToUpper(AChar: UnicodeChar): UnicodeChar; begin Result := TCharacter.ToUpper(AChar); end; function ToUpper(const AString: UnicodeString): UnicodeString; begin Result := TCharacter.ToUpper(AString); end; { TCharacter } class function TCharacter.TestCategory( const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory ) : Boolean; var pu : PUC_Prop; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); pu := GetProps(Word(AString[AIndex])); if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin if not IsSurrogatePair(AString,AIndex) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); pu := GetProps(AString[AIndex],AString[AIndex+1]); end; Result := (TUnicodeCategory(pu^.Category) = ACategory); end; class function TCharacter.TestCategory( const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet ) : Boolean; var pu : PUC_Prop; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); pu := GetProps(Word(AString[AIndex])); if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin if not IsSurrogatePair(AString,AIndex) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); pu := GetProps(AString[AIndex],AString[AIndex+1]); end; Result := (TUnicodeCategory(pu^.Category) in ACategory); end; constructor TCharacter.Create; begin raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]); end; class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; begin if AChar < UCS4_HALF_BASE then begin if IsSurrogate(UnicodeChar(AChar)) then raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]); Result := UnicodeChar(AChar); end else begin if AChar > MAX_LEGAL_UTF32 then raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]); SetLength(Result, 2); AChar := AChar - UCS4_HALF_BASE; Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN); Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN); end; end; class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); Result := Word(AString[AIndex]); if IsHighSurrogate(UnicodeChar(Result)) then begin if Length(AString) < Succ(AIndex) then raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]); Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]); end; end; class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); Result := Word(AString[AIndex]); if IsHighSurrogate(UnicodeChar(Result)) then begin if Length(AString) < Succ(AIndex) then raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]); Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]); ACharLength := 2; end else ACharLength := 1; end; class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; begin if not IsHighSurrogate(AHighSurrogate) then raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]); if not IsLowSurrogate(ALowSurrogate) then raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]); Result := ToUCS4(AHighSurrogate, ALowSurrogate); end; class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double; begin Result := GetProps(Word(AChar))^.NumericValue; end; class function TCharacter.GetNumericValue( const AString : UnicodeString; AIndex : Integer ) : Double; var pu : PUC_Prop; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); pu := GetProps(Word(AString[AIndex])); if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin if not IsSurrogatePair(AString,AIndex) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); pu := GetProps(AString[AIndex],AString[AIndex+1]); end; Result := pu^.NumericValue; end; class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; begin Result := TUnicodeCategory(GetProps(Word(AChar))^.Category); end; class function TCharacter.GetUnicodeCategory( const AString : UnicodeString; AIndex : Integer ) : TUnicodeCategory; var pu : PUC_Prop; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); pu := GetProps(Word(AString[AIndex])); if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin if not IsSurrogatePair(AString,AIndex) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); pu := GetProps(AString[AIndex],AString[AIndex+1]); end; Result := TUnicodeCategory(pu^.Category); end; class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl); end; class function TCharacter.IsControl( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl); end; class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber); end; class function TCharacter.IsDigit( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber); end; class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate); end; class function TCharacter.IsSurrogate( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); Result := IsSurrogate(AString[AIndex]); end; class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and (Word(AChar) >= HIGH_SURROGATE_BEGIN) and (Word(AChar) <= HIGH_SURROGATE_END); end; class function TCharacter.IsHighSurrogate( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); Result := IsHighSurrogate(AString[AIndex]); end; class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and (Word(AChar) >= LOW_SURROGATE_BEGIN) and (Word(AChar) <= LOW_SURROGATE_END); end; class function TCharacter.IsLowSurrogate( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); Result := IsLowSurrogate(AString[AIndex]); end; class function TCharacter.IsSurrogatePair( const AHighSurrogate, ALowSurrogate : UnicodeChar ) : Boolean; begin Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate); end; class function TCharacter.IsSurrogatePair( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); if not IsHighSurrogate(AString[AIndex]) then begin Result := False; exit; end; if ((AIndex+1) > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]); Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]); end; class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES); end; class function TCharacter.IsLetter( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,LETTER_CATEGORIES); end; class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES); end; class function TCharacter.IsLetterOrDigit( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES); end; class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter); end; class function TCharacter.IsLower( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter); end; class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES); end; class function TCharacter.IsNumber( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES); end; class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES); end; class function TCharacter.IsPunctuation( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES); end; class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES); end; class function TCharacter.IsSeparator( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES); end; class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES); end; class function TCharacter.IsSymbol( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES); end; class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean; begin Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter); end; class function TCharacter.IsUpper( const AString : UnicodeString; AIndex : Integer ) : Boolean; begin Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter); end; class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean; begin Result := GetProps(Word(AChar))^.WhiteSpace; end; class function TCharacter.IsWhiteSpace( const AString : UnicodeString; AIndex : Integer ) : Boolean; var pu : PUC_Prop; begin if (AIndex < 1) or (AIndex > Length(AString)) then raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]); pu := GetProps(Word(AString[AIndex])); if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin if not IsSurrogatePair(AString,AIndex) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); pu := GetProps(AString[AIndex],AString[AIndex+1]); end; Result := pu^.WhiteSpace; end; class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar; begin Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase)); if (Result = UnicodeChar(0)) then Result := AChar; end; class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString; begin Result := ToLower(AString,[]); end; class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; begin if (UnicodeToLower( AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result ) <> 0 ) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); end; class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar; begin Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase)); if (Result = UnicodeChar(0)) then Result := AChar; end; class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString; begin Result := ToUpper(AString,[]); end; class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; begin if (UnicodeToUpper( AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result ) <> 0 ) then raise EArgumentException.Create(SInvalidUnicodeCodePointSequence); end; {$endif VER2_4} end.