diff options
Diffstat (limited to 'src/zope/i18n/locales/data/transforms/Greek-Latin.xml')
-rw-r--r-- | src/zope/i18n/locales/data/transforms/Greek-Latin.xml | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/src/zope/i18n/locales/data/transforms/Greek-Latin.xml b/src/zope/i18n/locales/data/transforms/Greek-Latin.xml new file mode 100644 index 0000000..5b59d14 --- /dev/null +++ b/src/zope/i18n/locales/data/transforms/Greek-Latin.xml @@ -0,0 +1,257 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd"> +<supplementalData> + <transforms> + <transform source="Greek" target="Latin" direction="both"> + <comment># Rules are predicated on running NFD first, and NFC afterwards</comment> + <comment># :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;</comment> + <comment># MINIMAL FILTER GENERATED FOR: Greek-Latin</comment> + <tRule>:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;</tRule> + <tRule>:: NFD (NFC) ;</tRule> + <comment># TEST CASES</comment> + <comment># Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος</comment> + <comment># ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ</comment> + <comment># ᾳ ῃ ῳ ὃ ὄ</comment> + <comment># ὠς ὡς ὢς ὣς</comment> + <comment># Ὠς Ὡς Ὢς Ὣς</comment> + <comment># ὨΣ ὩΣ ὪΣ ὫΣ</comment> + <comment># Ạ, ạ, Ẹ, ẹ, Ọ, ọ</comment> + <comment># Useful variables</comment> + <tRule>$lower = [[:latin:][:greek:] & [:Ll:]];</tRule> + <tRule>$glower = [[:greek:] & [:Ll:]];</tRule> + <tRule>$upper = [[:latin:][:greek:] & [:Lu:]] ;</tRule> + <tRule>$accent = [:M:] ;</tRule> + <comment># NOTE: restrict to just the Greek & Latin accents that we care about</comment> + <comment># TODO: broaden out once interation is fixed</comment> + <tRule>$accentMinus = [ [̀-ͅ] & [:M:] - [̸]] ;</tRule> + <tRule>$macron = ̄ ;</tRule> + <tRule>$ddot = ̈ ;</tRule> + <tRule>$ddotmac = [$ddot$macron];</tRule> + <tRule>$lcgvowel = [αεηιουω] ;</tRule> + <tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule> + <tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule> + <tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule> + <tRule>$evowel = [aeiouyAEIOUY];</tRule> + <tRule>$evowel2 = [iuyIUY];</tRule> + <tRule>$vowel = [ $evowel $gvowel] ;</tRule> + <tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule> + <tRule>$egammaLike = [GKXCgkxc] ;</tRule> + <tRule>$smooth = ̓ ;</tRule> + <tRule>$rough = ̔ ;</tRule> + <tRule>$iotasub = ͅ ;</tRule> + <tRule>$evowel_i = [$evowel-[iI]] ;</tRule> + <tRule>$evowel2_i = [uyUY];</tRule> + <tRule>$underbar = ̱;</tRule> + <tRule>$afterLetter = [:L:] [[:M:]\']* ;</tRule> + <tRule>$beforeLetter = [[:M:]\']* [:L:] ;</tRule> + <tRule>$beforeLower = $accent * $lower ;</tRule> + <tRule>$notLetter = [^[:L:][:M:]] ;</tRule> + <tRule>$under = ̱;</tRule> + <comment># Fix punctuation</comment> + <comment># preserve original</comment> + <tRule>\: ↔ \: $under ;</tRule> + <tRule>\? ↔ \? $under ;</tRule> + <tRule>\; ↔ \? ;</tRule> + <tRule>· ↔ \: ;</tRule> + <comment># CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve</comment> + <tRule>͂ ↔ ̂ ;</tRule> + <comment># IOTA: convert iota subscript to iota</comment> + <comment># first make previous alpha long!</comment> + <tRule>$accent_minus = [[$accent]-[$iotasub$macron]];</tRule> + <tRule>Α } $accent_minus * $iotasub → | Α $macron ;</tRule> + <tRule>α } $accent_minus * $iotasub → | α $macron ;</tRule> + <comment># now convert to uppercase if after uppercase, ow to lowercase</comment> + <tRule>$upper $accent * { $iotasub → I ;</tRule> + <tRule>$iotasub → i ;</tRule> + <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;</tRule> + <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;</tRule> + <comment># BREATHING</comment> + <comment># Convert rough breathing to h, and move before letters.</comment> + <comment># Make A ` x = → H a x</comment> + <tRule>Α ($macron?) $rough } $beforeLower → H | α $1;</tRule> + <tRule>Ε $rough } $beforeLower → H | ε;</tRule> + <tRule>Η $rough } $beforeLower → H | η ;</tRule> + <tRule>Ι ($ddot?) $rough } $beforeLower → H | ι $1;</tRule> + <tRule>Ο $rough } $beforeLower → H | ο ;</tRule> + <tRule>Υ $rough } $beforeLower → H | υ ;</tRule> + <tRule>Ω ($ddot?) $rough } $beforeLower → H | ω $1;</tRule> + <comment># Make A x ` = → H a x</comment> + <tRule>Α ($glower $macron?) $rough → H | α $1 ;</tRule> + <tRule>Ε ($glower) $rough → H | ε $1 ;</tRule> + <tRule>Η ($glower) $rough → H | η $1 ;</tRule> + <tRule>Ι ($glower $ddot?) $rough → H | ι $1 ;</tRule> + <tRule>Ο ($glower) $rough → H | ο $1 ;</tRule> + <tRule>Υ ($glower) $rough → H | υ $1 ;</tRule> + <tRule>Ω ($glower $ddot?) $rough → H | ω $1 ;</tRule> + <comment>#Otherwise, make x ` into h x and X ` into H X</comment> + <tRule>($lcgvowel + $ddotmac? ) $rough → h | $1 ;</tRule> + <tRule>($gvowel + $ddotmac? ) $rough → H | $1 ;</tRule> + <comment># Go backwards with H</comment> + <tRule>| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;</tRule> + <tRule>| $1 $rough ← h ($evowel $macron? $ddot?) ;</tRule> + <tRule>| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;</tRule> + <tRule>| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;</tRule> + <comment># titlecase, have to fix individually</comment> + <comment># in the future, we should add &uppercase() to make this easier</comment> + <tRule>| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;</tRule> + <tRule>| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;</tRule> + <tRule>| A $1 $rough ← H a ($macron? $ddot? ) ;</tRule> + <tRule>| E $1 $rough ← H e ($macron? $ddot? ) ;</tRule> + <tRule>| I $1 $rough ← H i ($macron? $ddot? ) ;</tRule> + <tRule>| O $1 $rough ← H o ($macron? $ddot? ) ;</tRule> + <tRule>| U $1 $rough ← H u ($macron? $ddot? ) ;</tRule> + <tRule>| Y $1 $rough ← H y ($macron? $ddot? ) ;</tRule> + <comment># Now do smooth</comment> + <comment>#delete smooth breathing for Latin</comment> + <tRule>$smooth → ;</tRule> + <comment># insert in Greek</comment> + <comment># the assumption is that all Marks are on letters.</comment> + <tRule>| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;</tRule> + <tRule>| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;</tRule> + <tRule>| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;</tRule> + <comment># TODO: preserve smooth/rough breathing if not</comment> + <comment># on initial vowel sequence</comment> + <comment># need to have these up here so the rules don't mask</comment> + <comment># remove now superfluous macron when returning</comment> + <tRule>Α ← A $macron ;</tRule> + <tRule>α ← a $macron ;</tRule> + <tRule>η ↔ e $macron ;</tRule> + <tRule>Η ↔ E $macron ;</tRule> + <tRule>φ ↔ ph ;</tRule> + <tRule>Ψ } $beforeLower ↔ Ps ;</tRule> + <tRule>Ψ ↔ PS ;</tRule> + <tRule>Φ } $beforeLower ↔ Ph ;</tRule> + <tRule>Φ ↔ PH ;</tRule> + <tRule>ψ ↔ ps ;</tRule> + <tRule>ω ↔ o $macron ;</tRule> + <tRule>Ω ↔ O $macron;</tRule> + <comment># NORMAL</comment> + <tRule>α ↔ a ;</tRule> + <tRule>Α ↔ A ;</tRule> + <tRule>β ↔ b ;</tRule> + <tRule>Β ↔ B ;</tRule> + <tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule> + <tRule>γ ↔ g ;</tRule> + <tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule> + <tRule>Γ ↔ G ;</tRule> + <tRule>δ ↔ d ;</tRule> + <tRule>Δ ↔ D ;</tRule> + <tRule>ε ↔ e ;</tRule> + <tRule>Ε ↔ E ;</tRule> + <tRule>ζ ↔ z ;</tRule> + <tRule>Ζ ↔ Z ;</tRule> + <tRule>θ ↔ th ;</tRule> + <tRule>Θ } $beforeLower ↔ Th ;</tRule> + <tRule>Θ ↔ TH ;</tRule> + <tRule>ι ↔ i ;</tRule> + <tRule>Ι ↔ I ;</tRule> + <tRule>κ ↔ k ;</tRule> + <tRule>Κ ↔ K ;</tRule> + <tRule>λ ↔ l ;</tRule> + <tRule>Λ ↔ L ;</tRule> + <tRule>μ ↔ m ;</tRule> + <tRule>Μ ↔ M ;</tRule> + <tRule>ν } $gammaLike → n\' ;</tRule> + <tRule>ν ↔ n ;</tRule> + <tRule>Ν } $gammaLike ↔ N\' ;</tRule> + <tRule>Ν ↔ N ;</tRule> + <tRule>ξ ↔ x ;</tRule> + <tRule>Ξ ↔ X ;</tRule> + <tRule>ο ↔ o ;</tRule> + <tRule>Ο ↔ O ;</tRule> + <tRule>π ↔ p ;</tRule> + <tRule>Π ↔ P ;</tRule> + <tRule>ρ $rough ↔ rh;</tRule> + <tRule>Ρ $rough } $beforeLower ↔ Rh ;</tRule> + <tRule>Ρ $rough ↔ RH ;</tRule> + <tRule>ρ ↔ r ;</tRule> + <tRule>Ρ ↔ R ;</tRule> + <comment># insert separator before things that turn into s</comment> + <tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule> + <comment># special S variants</comment> + <tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule> + <tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule> + <tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule> + <tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule> + <comment># underbar means exception</comment> + <comment># before a letter, initial</comment> + <tRule>ς } $beforeLetter ↔ s $underbar } $beforeLetter;</tRule> + <tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule> + <comment># otherwise, after a letter = final</comment> + <tRule>$afterLetter { σ ↔ $afterLetter { s $underbar;</tRule> + <tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule> + <comment># otherwise (isolated) = initial</comment> + <tRule>ς ↔ s $underbar;</tRule> + <tRule>σ ↔ s ;</tRule> + <comment># [Pp] { Σ ↔ \'S ;</comment> + <tRule>Σ ↔ S ;</tRule> + <tRule>τ ↔ t ;</tRule> + <tRule>Τ ↔ T ;</tRule> + <tRule>$vowel {υ } ↔ u ;</tRule> + <tRule>υ ↔ y ;</tRule> + <tRule>$vowel { Υ ↔ U ;</tRule> + <tRule>Υ ↔ Y ;</tRule> + <tRule>χ ↔ ch ;</tRule> + <tRule>Χ } $beforeLower ↔ Ch ;</tRule> + <tRule>Χ ↔ CH ;</tRule> + <comment># Completeness for ASCII</comment> + <tRule>$ignore = [[:Mark:]''] * ;</tRule> + <tRule>| k ← c ;</tRule> + <tRule>| ph ← f ;</tRule> + <tRule>| i ← j ;</tRule> + <tRule>| k ← q ;</tRule> + <tRule>| b ← v } $vowel ;</tRule> + <tRule>| b ← w } $vowel;</tRule> + <tRule>| u ← v ;</tRule> + <tRule>| u ← w;</tRule> + <tRule>| K ← C ;</tRule> + <tRule>| Ph ← F ;</tRule> + <tRule>| I ← J ;</tRule> + <tRule>| K ← Q ;</tRule> + <tRule>| B ← V } $vowel ;</tRule> + <tRule>| B ← W } $vowel ;</tRule> + <tRule>| U ← V ;</tRule> + <tRule>| U ← W ;</tRule> + <tRule>$rough } $ignore [:UppercaseLetter:] → H ;</tRule> + <tRule>$ignore [:UppercaseLetter:] { $rough → H ;</tRule> + <tRule>$rough ← H ;</tRule> + <tRule>$rough ↔ h ;</tRule> + <comment># Completeness for Greek</comment> + <tRule>ϐ → | β ;</tRule> + <tRule>ϑ → | θ ;</tRule> + <tRule>ϒ → | Υ ;</tRule> + <tRule>ϕ → | φ ;</tRule> + <tRule>ϖ → | π ;</tRule> + <tRule>ϰ → | κ ;</tRule> + <tRule>ϱ → | ρ ;</tRule> + <tRule>ϲ → | σ ;</tRule> + <tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule> + <tRule>ϳ → j ;</tRule> + <tRule>ϴ → | Θ ;</tRule> + <tRule>ϵ → | ε ;</tRule> + <tRule>µ → | μ ;</tRule> + <tRule>ͺ → i;</tRule> + <comment># delete any trailing ' marks used for roundtripping</comment> + <tRule>← [Ππ] { \' } [Ss] ;</tRule> + <tRule>← [Νν] { \' } $egammaLike ;</tRule> + <tRule>::NFC (NFD) ;</tRule> + <comment># ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;</comment> + <comment># ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;</comment> + <comment># MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD</comment> + <tRule>:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;</tRule> + </transform> + </transforms> +</supplementalData> + |