summaryrefslogtreecommitdiff
path: root/src/zope/i18n/locales/data/transforms/Greek-Latin.xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/zope/i18n/locales/data/transforms/Greek-Latin.xml')
-rw-r--r--src/zope/i18n/locales/data/transforms/Greek-Latin.xml257
1 files changed, 257 insertions, 0 deletions
diff --git a/src/zope/i18n/locales/data/transforms/Greek-Latin.xml b/src/zope/i18n/locales/data/transforms/Greek-Latin.xml
new file mode 100644
index 0000000..5b59d14
--- /dev/null
+++ b/src/zope/i18n/locales/data/transforms/Greek-Latin.xml
@@ -0,0 +1,257 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd">
+<supplementalData>
+ <transforms>
+ <transform source="Greek" target="Latin" direction="both">
+ <comment># Rules are predicated on running NFD first, and NFC afterwards</comment>
+ <comment># :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;</comment>
+ <comment># MINIMAL FILTER GENERATED FOR: Greek-Latin</comment>
+ <tRule>:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;</tRule>
+ <tRule>:: NFD (NFC) ;</tRule>
+ <comment># TEST CASES</comment>
+ <comment># Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος</comment>
+ <comment># ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ</comment>
+ <comment># ᾳ ῃ ῳ ὃ ὄ</comment>
+ <comment># ὠς ὡς ὢς ὣς</comment>
+ <comment># Ὠς Ὡς Ὢς Ὣς</comment>
+ <comment># ὨΣ ὩΣ ὪΣ ὫΣ</comment>
+ <comment># Ạ, ạ, Ẹ, ẹ, Ọ, ọ</comment>
+ <comment># Useful variables</comment>
+ <tRule>$lower = [[:latin:][:greek:] &amp; [:Ll:]];</tRule>
+ <tRule>$glower = [[:greek:] &amp; [:Ll:]];</tRule>
+ <tRule>$upper = [[:latin:][:greek:] &amp; [:Lu:]] ;</tRule>
+ <tRule>$accent = [:M:] ;</tRule>
+ <comment># NOTE: restrict to just the Greek &amp; Latin accents that we care about</comment>
+ <comment># TODO: broaden out once interation is fixed</comment>
+ <tRule>$accentMinus = [ [̀-ͅ] &amp; [:M:] - [̸]] ;</tRule>
+ <tRule>$macron = ̄ ;</tRule>
+ <tRule>$ddot = ̈ ;</tRule>
+ <tRule>$ddotmac = [$ddot$macron];</tRule>
+ <tRule>$lcgvowel = [αεηιουω] ;</tRule>
+ <tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule>
+ <tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule>
+ <tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule>
+ <tRule>$evowel = [aeiouyAEIOUY];</tRule>
+ <tRule>$evowel2 = [iuyIUY];</tRule>
+ <tRule>$vowel = [ $evowel $gvowel] ;</tRule>
+ <tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule>
+ <tRule>$egammaLike = [GKXCgkxc] ;</tRule>
+ <tRule>$smooth = ̓ ;</tRule>
+ <tRule>$rough = ̔ ;</tRule>
+ <tRule>$iotasub = ͅ ;</tRule>
+ <tRule>$evowel_i = [$evowel-[iI]] ;</tRule>
+ <tRule>$evowel2_i = [uyUY];</tRule>
+ <tRule>$underbar = ̱;</tRule>
+ <tRule>$afterLetter = [:L:] [[:M:]\']* ;</tRule>
+ <tRule>$beforeLetter = [[:M:]\']* [:L:] ;</tRule>
+ <tRule>$beforeLower = $accent * $lower ;</tRule>
+ <tRule>$notLetter = [^[:L:][:M:]] ;</tRule>
+ <tRule>$under = ̱;</tRule>
+ <comment># Fix punctuation</comment>
+ <comment># preserve original</comment>
+ <tRule>\: ↔ \: $under ;</tRule>
+ <tRule>\? ↔ \? $under ;</tRule>
+ <tRule>\; ↔ \? ;</tRule>
+ <tRule>· ↔ \: ;</tRule>
+ <comment># CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve</comment>
+ <tRule>͂ ↔ ̂ ;</tRule>
+ <comment># IOTA: convert iota subscript to iota</comment>
+ <comment># first make previous alpha long!</comment>
+ <tRule>$accent_minus = [[$accent]-[$iotasub$macron]];</tRule>
+ <tRule>Α } $accent_minus * $iotasub → | Α $macron ;</tRule>
+ <tRule>α } $accent_minus * $iotasub → | α $macron ;</tRule>
+ <comment># now convert to uppercase if after uppercase, ow to lowercase</comment>
+ <tRule>$upper $accent * { $iotasub → I ;</tRule>
+ <tRule>$iotasub → i ;</tRule>
+ <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;</tRule>
+ <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;</tRule>
+ <comment># BREATHING</comment>
+ <comment># Convert rough breathing to h, and move before letters.</comment>
+ <comment># Make A ` x = → H a x</comment>
+ <tRule>Α ($macron?) $rough } $beforeLower → H | α $1;</tRule>
+ <tRule>Ε $rough } $beforeLower → H | ε;</tRule>
+ <tRule>Η $rough } $beforeLower → H | η ;</tRule>
+ <tRule>Ι ($ddot?) $rough } $beforeLower → H | ι $1;</tRule>
+ <tRule>Ο $rough } $beforeLower → H | ο ;</tRule>
+ <tRule>Υ $rough } $beforeLower → H | υ ;</tRule>
+ <tRule>Ω ($ddot?) $rough } $beforeLower → H | ω $1;</tRule>
+ <comment># Make A x ` = → H a x</comment>
+ <tRule>Α ($glower $macron?) $rough → H | α $1 ;</tRule>
+ <tRule>Ε ($glower) $rough → H | ε $1 ;</tRule>
+ <tRule>Η ($glower) $rough → H | η $1 ;</tRule>
+ <tRule>Ι ($glower $ddot?) $rough → H | ι $1 ;</tRule>
+ <tRule>Ο ($glower) $rough → H | ο $1 ;</tRule>
+ <tRule>Υ ($glower) $rough → H | υ $1 ;</tRule>
+ <tRule>Ω ($glower $ddot?) $rough → H | ω $1 ;</tRule>
+ <comment>#Otherwise, make x ` into h x and X ` into H X</comment>
+ <tRule>($lcgvowel + $ddotmac? ) $rough → h | $1 ;</tRule>
+ <tRule>($gvowel + $ddotmac? ) $rough → H | $1 ;</tRule>
+ <comment># Go backwards with H</comment>
+ <tRule>| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| $1 $rough ← h ($evowel $macron? $ddot?) ;</tRule>
+ <tRule>| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;</tRule>
+ <comment># titlecase, have to fix individually</comment>
+ <comment># in the future, we should add &amp;uppercase() to make this easier</comment>
+ <tRule>| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;</tRule>
+ <tRule>| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;</tRule>
+ <tRule>| A $1 $rough ← H a ($macron? $ddot? ) ;</tRule>
+ <tRule>| E $1 $rough ← H e ($macron? $ddot? ) ;</tRule>
+ <tRule>| I $1 $rough ← H i ($macron? $ddot? ) ;</tRule>
+ <tRule>| O $1 $rough ← H o ($macron? $ddot? ) ;</tRule>
+ <tRule>| U $1 $rough ← H u ($macron? $ddot? ) ;</tRule>
+ <tRule>| Y $1 $rough ← H y ($macron? $ddot? ) ;</tRule>
+ <comment># Now do smooth</comment>
+ <comment>#delete smooth breathing for Latin</comment>
+ <tRule>$smooth → ;</tRule>
+ <comment># insert in Greek</comment>
+ <comment># the assumption is that all Marks are on letters.</comment>
+ <tRule>| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;</tRule>
+ <tRule>| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;</tRule>
+ <tRule>| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;</tRule>
+ <comment># TODO: preserve smooth/rough breathing if not</comment>
+ <comment># on initial vowel sequence</comment>
+ <comment># need to have these up here so the rules don't mask</comment>
+ <comment># remove now superfluous macron when returning</comment>
+ <tRule>Α ← A $macron ;</tRule>
+ <tRule>α ← a $macron ;</tRule>
+ <tRule>η ↔ e $macron ;</tRule>
+ <tRule>Η ↔ E $macron ;</tRule>
+ <tRule>φ ↔ ph ;</tRule>
+ <tRule>Ψ } $beforeLower ↔ Ps ;</tRule>
+ <tRule>Ψ ↔ PS ;</tRule>
+ <tRule>Φ } $beforeLower ↔ Ph ;</tRule>
+ <tRule>Φ ↔ PH ;</tRule>
+ <tRule>ψ ↔ ps ;</tRule>
+ <tRule>ω ↔ o $macron ;</tRule>
+ <tRule>Ω ↔ O $macron;</tRule>
+ <comment># NORMAL</comment>
+ <tRule>α ↔ a ;</tRule>
+ <tRule>Α ↔ A ;</tRule>
+ <tRule>β ↔ b ;</tRule>
+ <tRule>Β ↔ B ;</tRule>
+ <tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule>
+ <tRule>γ ↔ g ;</tRule>
+ <tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule>
+ <tRule>Γ ↔ G ;</tRule>
+ <tRule>δ ↔ d ;</tRule>
+ <tRule>Δ ↔ D ;</tRule>
+ <tRule>ε ↔ e ;</tRule>
+ <tRule>Ε ↔ E ;</tRule>
+ <tRule>ζ ↔ z ;</tRule>
+ <tRule>Ζ ↔ Z ;</tRule>
+ <tRule>θ ↔ th ;</tRule>
+ <tRule>Θ } $beforeLower ↔ Th ;</tRule>
+ <tRule>Θ ↔ TH ;</tRule>
+ <tRule>ι ↔ i ;</tRule>
+ <tRule>Ι ↔ I ;</tRule>
+ <tRule>κ ↔ k ;</tRule>
+ <tRule>Κ ↔ K ;</tRule>
+ <tRule>λ ↔ l ;</tRule>
+ <tRule>Λ ↔ L ;</tRule>
+ <tRule>μ ↔ m ;</tRule>
+ <tRule>Μ ↔ M ;</tRule>
+ <tRule>ν } $gammaLike → n\' ;</tRule>
+ <tRule>ν ↔ n ;</tRule>
+ <tRule>Ν } $gammaLike ↔ N\' ;</tRule>
+ <tRule>Ν ↔ N ;</tRule>
+ <tRule>ξ ↔ x ;</tRule>
+ <tRule>Ξ ↔ X ;</tRule>
+ <tRule>ο ↔ o ;</tRule>
+ <tRule>Ο ↔ O ;</tRule>
+ <tRule>π ↔ p ;</tRule>
+ <tRule>Π ↔ P ;</tRule>
+ <tRule>ρ $rough ↔ rh;</tRule>
+ <tRule>Ρ $rough } $beforeLower ↔ Rh ;</tRule>
+ <tRule>Ρ $rough ↔ RH ;</tRule>
+ <tRule>ρ ↔ r ;</tRule>
+ <tRule>Ρ ↔ R ;</tRule>
+ <comment># insert separator before things that turn into s</comment>
+ <tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule>
+ <comment># special S variants</comment>
+ <tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule>
+ <tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule>
+ <tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule>
+ <tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule>
+ <comment># underbar means exception</comment>
+ <comment># before a letter, initial</comment>
+ <tRule>ς } $beforeLetter ↔ s $underbar } $beforeLetter;</tRule>
+ <tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule>
+ <comment># otherwise, after a letter = final</comment>
+ <tRule>$afterLetter { σ ↔ $afterLetter { s $underbar;</tRule>
+ <tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule>
+ <comment># otherwise (isolated) = initial</comment>
+ <tRule>ς ↔ s $underbar;</tRule>
+ <tRule>σ ↔ s ;</tRule>
+ <comment># [Pp] { Σ ↔ \'S ;</comment>
+ <tRule>Σ ↔ S ;</tRule>
+ <tRule>τ ↔ t ;</tRule>
+ <tRule>Τ ↔ T ;</tRule>
+ <tRule>$vowel {υ } ↔ u ;</tRule>
+ <tRule>υ ↔ y ;</tRule>
+ <tRule>$vowel { Υ ↔ U ;</tRule>
+ <tRule>Υ ↔ Y ;</tRule>
+ <tRule>χ ↔ ch ;</tRule>
+ <tRule>Χ } $beforeLower ↔ Ch ;</tRule>
+ <tRule>Χ ↔ CH ;</tRule>
+ <comment># Completeness for ASCII</comment>
+ <tRule>$ignore = [[:Mark:]''] * ;</tRule>
+ <tRule>| k ← c ;</tRule>
+ <tRule>| ph ← f ;</tRule>
+ <tRule>| i ← j ;</tRule>
+ <tRule>| k ← q ;</tRule>
+ <tRule>| b ← v } $vowel ;</tRule>
+ <tRule>| b ← w } $vowel;</tRule>
+ <tRule>| u ← v ;</tRule>
+ <tRule>| u ← w;</tRule>
+ <tRule>| K ← C ;</tRule>
+ <tRule>| Ph ← F ;</tRule>
+ <tRule>| I ← J ;</tRule>
+ <tRule>| K ← Q ;</tRule>
+ <tRule>| B ← V } $vowel ;</tRule>
+ <tRule>| B ← W } $vowel ;</tRule>
+ <tRule>| U ← V ;</tRule>
+ <tRule>| U ← W ;</tRule>
+ <tRule>$rough } $ignore [:UppercaseLetter:] → H ;</tRule>
+ <tRule>$ignore [:UppercaseLetter:] { $rough → H ;</tRule>
+ <tRule>$rough ← H ;</tRule>
+ <tRule>$rough ↔ h ;</tRule>
+ <comment># Completeness for Greek</comment>
+ <tRule>ϐ → | β ;</tRule>
+ <tRule>ϑ → | θ ;</tRule>
+ <tRule>ϒ → | Υ ;</tRule>
+ <tRule>ϕ → | φ ;</tRule>
+ <tRule>ϖ → | π ;</tRule>
+ <tRule>ϰ → | κ ;</tRule>
+ <tRule>ϱ → | ρ ;</tRule>
+ <tRule>ϲ → | σ ;</tRule>
+ <tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule>
+ <tRule>ϳ → j ;</tRule>
+ <tRule>ϴ → | Θ ;</tRule>
+ <tRule>ϵ → | ε ;</tRule>
+ <tRule>µ → | μ ;</tRule>
+ <tRule>ͺ → i;</tRule>
+ <comment># delete any trailing ' marks used for roundtripping</comment>
+ <tRule>← [Ππ] { \' } [Ss] ;</tRule>
+ <tRule>← [Νν] { \' } $egammaLike ;</tRule>
+ <tRule>::NFC (NFD) ;</tRule>
+ <comment># ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;</comment>
+ <comment># ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;</comment>
+ <comment># MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD</comment>
+ <tRule>:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;</tRule>
+ </transform>
+ </transforms>
+</supplementalData>
+