summaryrefslogtreecommitdiff
path: root/src/zope/i18n/locales/data/transforms/Arabic-Latin.xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/zope/i18n/locales/data/transforms/Arabic-Latin.xml')
-rw-r--r--src/zope/i18n/locales/data/transforms/Arabic-Latin.xml132
1 files changed, 132 insertions, 0 deletions
diff --git a/src/zope/i18n/locales/data/transforms/Arabic-Latin.xml b/src/zope/i18n/locales/data/transforms/Arabic-Latin.xml
new file mode 100644
index 0000000..841858a
--- /dev/null
+++ b/src/zope/i18n/locales/data/transforms/Arabic-Latin.xml
@@ -0,0 +1,132 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd">
+<supplementalData>
+ <transforms>
+ <transform source="Arabic" target="Latin" direction="both">
+ <comment># Generally follows UNGEGN ←http://www.eki.ee/wgrs/rom1_ar.pdf→</comment>
+ <comment># Occasionally deviates in the direction of ISO 233 ←http://homepage.mac.com/sirbinks/pdf/Arabic.pdf→</comment>
+ <comment># a) where required for disambiguation.</comment>
+ <comment># b) with underdot instead of cedilla for letter like SAD, since</comment>
+ <comment># those are explicitly in Unicode for transliteration.</comment>
+ <comment># c) with extra non-Arabic-language letters, like PEH</comment>
+ <comment># Does *not* do assimilation of &quot;al&quot;, nor hyphenation.</comment>
+ <comment># While it could be done, we need to determine whether a prefix &quot;al&quot; could</comment>
+ <comment># occur other than as the definite article (since no space is used).</comment>
+ <tRule>:: [[:Arabic:] [‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ;</tRule>
+ <tRule>:: NFKD (NFC);</tRule>
+ <tRule>$disambig = ̱ ;</tRule>
+ <tRule>$disambig2 = ̰ ;</tRule>
+ <tRule>$under = ̣ ;</tRule>
+ <tRule>$descender = ˌ;</tRule>
+ <tRule>$notAbove = [[:^ccc=0:]&amp;[:^ccc=230:]];</tRule>
+ <comment># non-letters</comment>
+ <tRule>٫ ↔ '.' $disambig ; # ARABIC DECIMAL SEPARATOR</tRule>
+ <tRule>٬ ↔ ',' $disambig ; # ARABIC THOUSANDS SEPARATOR</tRule>
+ <comment># ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate</comment>
+ <tRule>، ↔ ',' ; # ARABIC COMMA</tRule>
+ <tRule>؛ ↔ ';' ; # ARABIC SEMICOLON</tRule>
+ <tRule>؟ ↔ '?' ; # ARABIC QUESTION MARK</tRule>
+ <tRule>٪ ↔ '%' ; # ARABIC PERCENT SIGN</tRule>
+ <tRule>۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO</tRule>
+ <tRule>۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE</tRule>
+ <tRule>۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO</tRule>
+ <tRule>۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE</tRule>
+ <tRule>۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR</tRule>
+ <tRule>۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE</tRule>
+ <tRule>۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX</tRule>
+ <tRule>۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN</tRule>
+ <tRule>۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT</tRule>
+ <tRule>۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE</tRule>
+ <tRule>٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO</tRule>
+ <tRule>١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE</tRule>
+ <tRule>٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO</tRule>
+ <tRule>٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE</tRule>
+ <tRule>٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR</tRule>
+ <tRule>٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE</tRule>
+ <tRule>٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX</tRule>
+ <tRule>٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN</tRule>
+ <tRule>٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT</tRule>
+ <tRule>٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE</tRule>
+ <comment># letters</comment>
+ <comment># long vowels</comment>
+ <tRule>َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF</tRule>
+ <tRule>ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW</tRule>
+ <tRule>ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH</tRule>
+ <comment># longer items moved here to prevent masking</comment>
+ <tRule>ث ↔ t h $disambig ; # ARABIC LETTER THEH</tRule>
+ <tRule>ذ ↔ d h $disambig ; # ARABIC LETTER THAL</tRule>
+ <tRule>ش ↔ s h $disambig ; # ARABIC LETTER SHEEN</tRule>
+ <tRule>ص ↔ s $under ; # ARABIC LETTER SAD</tRule>
+ <tRule>ض ↔ d $under ; # ARABIC LETTER DAD</tRule>
+ <tRule>ط ↔ t $under ; # ARABIC LETTER TAH</tRule>
+ <tRule>ظ ↔ z $under ; # ARABIC LETTER ZAH</tRule>
+ <tRule>غ ↔ g h $disambig ; # ARABIC LETTER GHAIN</tRule>
+ <comment># WARNING: special case</comment>
+ <comment># ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→</comment>
+ <comment># so on the return, we have to skip over (but preserve) the half-ring below (or others like it)</comment>
+ <comment># ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS</comment>
+ <tRule>ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA</tRule>
+ <tRule>ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA</tRule>
+ <comment># non-Arabic language</comment>
+ <tRule>ژ ↔ z h $disambig ; # ARABIC LETTER JEH</tRule>
+ <tRule>ڭ ↔ n $disambig g ; # ARABIC LETTER NG</tRule>
+ <tRule>ۋ ↔ v $disambig ; # ARABIC LETTER VE</tRule>
+ <tRule>ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH</tRule>
+ <tRule>ښ ↔ s $descender;</tRule>
+ <comment># Arabic language</comment>
+ <tRule>ء ↔ ʾ ; # ARABIC LETTER HAMZA</tRule>
+ <tRule>ا ↔ a $under; # ARABIC LETTER ALEF</tRule>
+ <tRule>ب ↔ b ; # ARABIC LETTER BEH</tRule>
+ <tRule>ت ↔ t ; # ARABIC LETTER TEH</tRule>
+ <tRule>ج ↔ j ; # ARABIC LETTER JEEM</tRule>
+ <tRule>ح ↔ h $under ; # ARABIC LETTER HAH</tRule>
+ <tRule>خ ↔ k h $disambig ; # ARABIC LETTER KHAH</tRule>
+ <tRule>د ↔ d ; # ARABIC LETTER DAL</tRule>
+ <tRule>ر ↔ r ; # ARABIC LETTER REH</tRule>
+ <tRule>ز ↔ z ; # ARABIC LETTER ZAIN</tRule>
+ <tRule>س ↔ s ; # ARABIC LETTER SEEN</tRule>
+ <tRule>ع ↔ ʿ ; # ARABIC LETTER AIN</tRule>
+ <tRule>ـ → ; # ARABIC TATWEEL</tRule>
+ <tRule>ف ↔ f ; # ARABIC LETTER FEH</tRule>
+ <tRule>ق ↔ q ; # ARABIC LETTER QAF</tRule>
+ <tRule>ك ↔ k ; # ARABIC LETTER KAF</tRule>
+ <tRule>ل ↔ l ; # ARABIC LETTER LAM</tRule>
+ <tRule>م ↔ m ; # ARABIC LETTER MEEM</tRule>
+ <tRule>ن ↔ n ; # ARABIC LETTER NOON</tRule>
+ <tRule>ه ↔ h ; # ARABIC LETTER HEH</tRule>
+ <tRule>و ↔ w ; # ARABIC LETTER WAW</tRule>
+ <tRule>ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA</tRule>
+ <tRule>ي ↔ y ; # ARABIC LETTER YEH</tRule>
+ <tRule>ً ↔ aⁿ ; # ARABIC FATHATAN</tRule>
+ <tRule>ٌ ↔ uⁿ ; # ARABIC DAMMATAN</tRule>
+ <tRule>ٍ ↔ iⁿ ; # ARABIC KASRATAN</tRule>
+ <tRule>َ ↔ a ; # ARABIC FATHA</tRule>
+ <tRule>ُ ↔ u ; # ARABIC DAMMA</tRule>
+ <tRule>ِ ↔ i ; # ARABIC KASRA</tRule>
+ <tRule>ّ ↔ ̃ ; # ARABIC SHADDA</tRule>
+ <tRule>ْ ↔ ̊ ; # ARABIC SUKUN</tRule>
+ <comment># special combining marks</comment>
+ <tRule>ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE</tRule>
+ <tRule>ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE</tRule>
+ <tRule>ٕ ↔ ̹ ; # ARABIC HAMZA BELOW</tRule>
+ <comment># Some non-Arabic language (not in UNGEGN)</comment>
+ <tRule>پ ↔ p ; # ARABIC LETTER PEH</tRule>
+ <tRule>چ ↔ c h $disambig ; # ARABIC LETTER TCHEH</tRule>
+ <tRule>ڤ ↔ v ; # ARABIC LETTER VEH</tRule>
+ <comment># ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW</comment>
+ <comment># ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW</comment>
+ <tRule>گ ↔ g ; # ARABIC LETTER GAF</tRule>
+ <comment># fallbacks</comment>
+ <tRule>| s ← c } [eiy];</tRule>
+ <tRule>| k ← c ;</tRule>
+ <tRule>| i ← e ;</tRule>
+ <tRule>| u ← o ;</tRule>
+ <tRule>| ks ← x ;</tRule>
+ <tRule>| n ← ‎ⁿ;</tRule>
+ <tRule>:: (lower) ;</tRule>
+ <tRule>::NFC (NFD);</tRule>
+ <tRule>:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] );</tRule>
+ </transform>
+ </transforms>
+</supplementalData>
+