diff options
Diffstat (limited to 'src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml')
-rw-r--r-- | src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml b/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml new file mode 100644 index 0000000..de3980c --- /dev/null +++ b/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml @@ -0,0 +1,489 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd"> +<supplementalData> + <transforms> + <transform source="Latin" target="ConjoiningJamo" direction="both" visibility="internal"> + <comment># Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303 + </comment> + <comment>#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in</comment> + <comment>#- the INDEX file. This transliterator is, by itself, not</comment> + <comment>#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or</comment> + <comment>#- inverses thereof.</comment> + <comment># Transliteration from Latin characters to Korean script is done in</comment> + <comment># two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul</comment> + <comment># transliteration is done algorithmically following Unicode 3.0</comment> + <comment># section 3.11. This file implements the Latin to Jamo</comment> + <comment># transliteration using rules.</comment> + <comment># Jamo occupy the block 1100-11FF. Within this block there are three</comment> + <comment># groups of characters: initial consonants or choseong (I), medial</comment> + <comment># vowels or jungseong (M), and trailing consonants or jongseong (F).</comment> + <comment># Standard Korean syllables are of the form I+M+F*.</comment> + <comment># Section 3.11 describes the use of 'filler' jamo to convert</comment> + <comment># nonstandard syllables to standard form: the choseong filler 115F and</comment> + <comment># the junseong filler 1160. In this transliterator, we will not use</comment> + <comment># 115F or 1160.</comment> + <comment># We will, however, insert two 'null' jamo to make foreign words</comment> + <comment># conform to Korean syllable structure. These are the null initial</comment> + <comment># consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,</comment> + <comment># we will use the separator in order to disambiguate strings,</comment> + <comment># e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).</comment> + <comment># We will not use all of the characters in the jamo block. We will</comment> + <comment># only use the 19 initials, 21 medials, and 27 finals possessing a</comment> + <comment># jamo short name as defined in section 4.4 of the Unicode book.</comment> + <comment># Rules of thumb. These guidelines provide the basic framework</comment> + <comment># for the rules. They are phrased in terms of Latin-Jamo transliteration.</comment> + <comment># The Jamo-Latin rules derive from these, since the Jamo-Latin rules are</comment> + <comment># just context-free transliteration of jamo to corresponding short names,</comment> + <comment># with the addition of separators to maintain round-trip integrity</comment> + <comment># in the context of the Latin-Jamo rules.</comment> + <comment># A sequence of vowels:</comment> + <comment># - Take the longest sequence you can. If there are too many, or you don't</comment> + <comment># have a starting consonant, introduce a 110B necessary.</comment> + <comment># A sequence of consonants.</comment> + <comment># - First join the double consonants: G + G -→ GG</comment> + <comment># - In the remaining list,</comment> + <comment># -- If there is no preceding vowel, take the first consonant, and insert EU</comment> + <comment># after it. Continue with the rest of the consonants.</comment> + <comment># -- If there is one consonant, attach to the following vowel</comment> + <comment># -- If there are two consonants and a following vowel, attach one to the</comment> + <comment># preceeding vowel, and one to the following vowel.</comment> + <comment># -- If there are more than two consonants, join the first two together if you</comment> + <comment># can: L + G =→ LG</comment> + <comment># -- If you still end up with more than 2 consonants, insert EU after the</comment> + <comment># first one, and continue with the rest of the consonants.</comment> + <comment>#----------------------------------------------------------------------</comment> + <comment># Variables</comment> + <comment># Some latin consonants or consonant pairs only occur as initials, and</comment> + <comment># some only as finals, but some occur as both. This makes some jamo</comment> + <comment># consonants ambiguous when transliterated into latin.</comment> + + <comment># Initial only: IEUNG BB DD JJ R</comment> + <comment># Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ</comment> + <comment># Initial and Final: B C D G GG H J K M N P S SS T</comment> + <tRule>$Gi = ᄀ;</tRule> + <tRule>$KKi = ᄁ;</tRule> + <tRule>$Ni = ᄂ;</tRule> + <tRule>$Di = ᄃ;</tRule> + <tRule>$TTi = ᄄ;</tRule> + <tRule>$Li = ᄅ;</tRule> + <tRule>$Mi = ᄆ;</tRule> + <tRule>$Bi = ᄇ;</tRule> + <tRule>$PPi = ᄈ;</tRule> + <tRule>$Si = ᄉ;</tRule> + <tRule>$SSi = ᄊ;</tRule> + <tRule>$IEUNG = ᄋ; # null initial, inserted during Latin-Jamo</tRule> + <tRule>$Ji = ᄌ;</tRule> + <tRule>$JJi = ᄍ;</tRule> + <tRule>$CHi = ᄎ;</tRule> + <tRule>$Ki = ᄏ;</tRule> + <tRule>$Ti = ᄐ;</tRule> + <tRule>$Pi = ᄑ;</tRule> + <tRule>$Hi = ᄒ;</tRule> + + <tRule>$A = ᅡ;</tRule> + <tRule>$AE = ᅢ;</tRule> + <tRule>$YA = ᅣ;</tRule> + <tRule>$YAE = ᅤ;</tRule> + <tRule>$EO = ᅥ;</tRule> + <tRule>$E = ᅦ;</tRule> + <tRule>$YEO = ᅧ;</tRule> + <tRule>$YE = ᅨ;</tRule> + <tRule>$O = ᅩ;</tRule> + <tRule>$WA = ᅪ;</tRule> + <tRule>$WAE = ᅫ;</tRule> + <tRule>$OE = ᅬ;</tRule> + <tRule>$YO = ᅭ;</tRule> + <tRule>$U = ᅮ;</tRule> + <tRule>$WO = ᅯ;</tRule> + <tRule>$WE = ᅰ;</tRule> + <tRule>$WI = ᅱ;</tRule> + <tRule>$YU = ᅲ;</tRule> + <tRule>$EU = ᅳ; # null medial, inserted during Latin-Jamo</tRule> + <tRule>$UI = ᅴ;</tRule> + <tRule>$I = ᅵ;</tRule> + + <tRule>$Gf = ᆨ;</tRule> + <tRule>$GGf = ᆩ;</tRule> + <tRule>$GS = ᆪ;</tRule> + <tRule>$Nf = ᆫ;</tRule> + <tRule>$NJ = ᆬ;</tRule> + <tRule>$NH = ᆭ;</tRule> + <tRule>$Df = ᆮ;</tRule> + <tRule>$L = ᆯ;</tRule> + <tRule>$LG = ᆰ;</tRule> + <tRule>$LM = ᆱ;</tRule> + <tRule>$LB = ᆲ;</tRule> + <tRule>$LS = ᆳ;</tRule> + <tRule>$LT = ᆴ;</tRule> + <tRule>$LP = ᆵ;</tRule> + <tRule>$LH = ᆶ;</tRule> + <tRule>$Mf = ᆷ;</tRule> + <tRule>$Bf = ᆸ;</tRule> + <tRule>$BS = ᆹ;</tRule> + <tRule>$Sf = ᆺ;</tRule> + <tRule>$SSf = ᆻ;</tRule> + <tRule>$NG = ᆼ;</tRule> + <tRule>$Jf = ᆽ;</tRule> + <tRule>$Cf = ᆾ;</tRule> + <tRule>$Kf = ᆿ;</tRule> + <tRule>$Tf = ᇀ;</tRule> + <tRule>$Pf = ᇁ;</tRule> + <tRule>$Hf = ᇂ;</tRule> + + <tRule>$jamoInitial = [ᄀ-ᄒ];</tRule> + <tRule>$jamoMedial = [ᅡ-ᅵ];</tRule> + <tRule>$latinInitial = [bcdghjklmnprst];</tRule> + + <comment># Any character in the latin transliteration of a medial</comment> + <tRule>$latinMedial = [aeiouwy];</tRule> + + <comment># The last character of the latin transliteration of a medial</comment> + <tRule>$latinMedialEnd = [aeiou];</tRule> + + <comment># Disambiguation separator</comment> + <tRule>$sep = \-;</tRule> + + <comment>#----------------------------------------------------------------------</comment> + <comment># Jamo-Latin</comment> + <comment># Jamo to latin is relatively simple, since it is the latin that is</comment> + <comment># ambiguous. Most rules are straightforward, and we encode them below</comment> + <comment># as simple add-on back rule, e.g.:</comment> + <comment># $jamoMedial {bs} → $BS;</comment> + <comment># becomes</comment> + <comment># $jamoMedial {bs} ↔ $BS;</comment> + <comment># Furthermore, we don't care about the ordering for Jamo-Latin because</comment> + <comment># we are going from single characters, so we can very easily piggyback</comment> + <comment># on the Latin-Jamo.</comment> + <comment># The main issue with Jamo-Latin is when to insert separators.</comment> + <comment># Separators are inserted to obtain correct round trip behavior. For</comment> + <comment># example, the sequence Ki A Gf Gi E, if transliterated to "kagge",</comment> + <comment># would then round trip to Ki A GGi E. To prevent this, we insert a</comment> + <comment># separator: "kag-ge". IMPORTANT: The need for separators depends</comment> + <comment># very specifically on the behavior of the Latin-Jamo rules. A change</comment> + <comment># in the Latin-Jamo behavior can completely change the way the</comment> + <comment># separator insertion must be done.</comment> + + <comment># First try to preserve actual separators in the jamo text by doubling</comment> + <comment># them. This fixes problems like:</comment> + <comment># (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol</comment> + <comment># =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional</comment> + <comment># -- if we don't care about losing separators in the jamo, we can delete</comment> + <comment># this rule.</comment> + <tRule>$sep $sep ↔ $sep;</tRule> + + <comment># Triple consonants. For three consonants "axxx" we insert a</comment> + <comment># separator between the first and second "x" if XXf, Xf, and Xi all</comment> + <comment># exist, and we have A Xf XXi. This prevents the reverse</comment> + <comment># transliteration to A XXf Xi.</comment> + + <tRule>$sep ← $latinMedialEnd s {} $SSi;</tRule> + + <comment># For vowels the rule is similar. If there is a vowel "ae" such that</comment> + <comment># "a" by itself and "e" by itself are vowels, then we want to map A E</comment> + <comment># to "a-e" so as not to round trip to AE. However, in the text Ki EO</comment> + <comment># IEUNG E we don't need to map to "keo-e". "keoe" suffices. For</comment> + <comment># vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be</comment> + <comment># tested. NOTE: These rules used to have a left context of</comment> + <comment># $latinInitial instead of [^$latinMedial]. The problem with this is</comment> + <comment># sequences where an initial IEUNG is transliterated away:</comment> + <comment># (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O)</comment> + <tRule>$sep ← [^$latinMedial] [y] e {} [$O $OE];</tRule> + <tRule>$sep ← [^$latinMedial] e {} [$U $UI];</tRule> + <tRule>$sep ← [^$latinMedial] [o a] {} [$E $EO $EU];</tRule> + <tRule>$sep ← [^$latinMedial] [w y] a {} [$E $EO $EU];</tRule> + <tRule>$sep ← [^$latinMedial] [u] {} [$I];</tRule> + + <comment># Similar to the above, but with an intervening $IEUNG.</comment> + + <tRule>$sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE];</tRule> + <tRule>$sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U];</tRule> + + <tRule>$sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];</tRule> + <tRule>$sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];</tRule> + + <comment># Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,</comment> + <comment># where Xi also exists, must be transliterated as "ax-e" to prevent</comment> + <comment># the round trip conversion to A Xi E.</comment> + <tRule>$sep ← $latinMedialEnd b {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd d {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd g {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd h {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd j {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd k {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd m {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd n {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd p {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd s {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd t {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l {} $IEUNG $jamoMedial;</tRule> + + <comment># Double finals followed by IEUNG. Similar to the single finals</comment> + <comment># followed by IEUNG. Any latin consonant pair X Y, between medials,</comment> + <comment># that we would split by Latin-Jamo, we must handle when it occurs as</comment> + <comment># part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E</comment> + <tRule>$sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial;</tRule> + + <comment># Split doubles. Text of the form A Xi Xf E, where XXi also occurs,</comment> + <comment># we transliterate as "ax-xe" to prevent round trip transliteration as</comment> + <comment># A XXi E.</comment> + + <tRule>$sep ← $latinMedialEnd j {} $Ji $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd k {} $Ki $jamoMedial;</tRule> + <tRule>$sep ← $latinMedialEnd s {} $Si $jamoMedial;</tRule> + + <comment># XYY. This corresponds to the XYY rule in Latin-Jamo. By default</comment> + <comment># Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,</comment> + <comment># "xyy" forms that correspond to XYf Yi must be transliterated as</comment> + <comment># "xy-y".</comment> + <tRule>$sep ← $latinMedialEnd b s {} [$Si $SSi];</tRule> + <tRule>$sep ← $latinMedialEnd g s {} [$Si $SSi];</tRule> + <tRule>$sep ← $latinMedialEnd l b {} [$Bi];</tRule> + <tRule>$sep ← $latinMedialEnd l g {} [$Gi];</tRule> + <tRule>$sep ← $latinMedialEnd l s {} [$Si $SSi];</tRule> + <tRule>$sep ← $latinMedialEnd n g {} [$Gi];</tRule> + <tRule>$sep ← $latinMedialEnd n j {} [$Ji $JJi];</tRule> + + <tRule>$sep ← $latinMedialEnd l {} [$PPi];</tRule> + <tRule>$sep ← $latinMedialEnd l {} [$TTi];</tRule> + <tRule>$sep ← $latinMedialEnd k {} [$KKi $Ki];</tRule> + <tRule>$sep ← $latinMedialEnd p {} $Pi;</tRule> + <tRule>$sep ← $latinMedialEnd t {} $Ti;</tRule> + <tRule>$sep ← $latinMedialEnd c {} [$Hi];</tRule> + + <comment># Deletion of IEUNG is handled below.</comment> + <comment>#----------------------------------------------------------------------</comment> + <comment># Latin-Jamo</comment> + <comment># [Basic, context-free Jamo-Latin rules are embedded here too. See</comment> + <comment># above.]</comment> + <comment># Split digraphs: Text of the form 'axye', where 'xy' is a final</comment> + <comment># digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and</comment> + <comment># 'e' are medials, we want to transliterate this as A Xf Yi E rather</comment> + <comment># than A XYf IEUNG E. We do NOT include text of the form "axxe",</comment> + <comment># since that is handled differently below. These rules are generated</comment> + <comment># programmatically from the jamo data.</comment> + <tRule>$jamoMedial {b s} $latinMedial → $Bf $Si;</tRule> + <tRule>$jamoMedial {g s} $latinMedial → $Gf $Si;</tRule> + <tRule>$jamoMedial {l b} $latinMedial → $L $Bi;</tRule> + <tRule>$jamoMedial {l g} $latinMedial → $L $Gi;</tRule> + <tRule>$jamoMedial {l h} $latinMedial → $L $Hi;</tRule> + <tRule>$jamoMedial {l m} $latinMedial → $L $Mi;</tRule> + <tRule>$jamoMedial {l p} $latinMedial → $L $Pi;</tRule> + <tRule>$jamoMedial {l s} $latinMedial → $L $Si;</tRule> + <tRule>$jamoMedial {l t} $latinMedial → $L $Ti;</tRule> + <tRule>$jamoMedial {n g} $latinMedial → $Nf $Gi;</tRule> + <tRule>$jamoMedial {n h} $latinMedial → $Nf $Hi;</tRule> + <tRule>$jamoMedial {n j} $latinMedial → $Nf $Ji;</tRule> + + <comment># Single consonants are initials: Text of the form 'axe', where 'x'</comment> + <comment># can be an initial or a final, and 'a' and 'e' are medials, we want</comment> + <comment># to transliterate as A Xi E rather than A Xf IEUNG E.</comment> + <tRule>$jamoMedial {b} $latinMedial → $Bi;</tRule> + <tRule>$jamoMedial {ch} $latinMedial → $CHi;</tRule> + <tRule>$jamoMedial {d} $latinMedial → $Di;</tRule> + <tRule>$jamoMedial {g} $latinMedial → $Gi;</tRule> + <tRule>$jamoMedial {h} $latinMedial → $Hi;</tRule> + <tRule>$jamoMedial {j} $latinMedial → $Ji;</tRule> + <tRule>$jamoMedial {k} $latinMedial → $Ki;</tRule> + <tRule>$jamoMedial {m} $latinMedial → $Mi;</tRule> + <tRule>$jamoMedial {n} $latinMedial → $Ni;</tRule> + <tRule>$jamoMedial {p} $latinMedial → $Pi;</tRule> + <tRule>$jamoMedial {s} $latinMedial → $Si;</tRule> + <tRule>$jamoMedial {t} $latinMedial → $Ti;</tRule> + <tRule>$jamoMedial {l} $latinMedial → $Li;</tRule> + + <comment># Doubled initials. The sequence "axxe", where XX exists as an initial</comment> + <comment># (XXi), and also Xi and Xf exist (true of all digraphs XX), we want</comment> + <comment># to transliterate as A XXi E, rather than split to A Xf Xi E.</comment> + <tRule>$jamoMedial {pp} $latinMedial → $PPi;</tRule> + <tRule>$jamoMedial {t t} $latinMedial → $TTi;</tRule> + <tRule>$jamoMedial {j j} $latinMedial → $JJi;</tRule> + <tRule>$jamoMedial {k k} $latinMedial → $KKi;</tRule> + <tRule>$jamoMedial {s s} $latinMedial → $SSi;</tRule> + + <comment># XYY. Because doubled consonants bind more strongly than XY</comment> + <comment># consonants, we must handle the sequence "axyy" specially. Here XYf</comment> + <comment># and YYi must exist. In these cases, we map to Xf YYi rather than</comment> + <comment># XYf.</comment> + <tRule>$jamoMedial {b} s s → $Bf;</tRule> + <tRule>$jamoMedial {g} s s → $Gf;</tRule> + <tRule>$jamoMedial {l} b b → $L;</tRule> + <tRule>$jamoMedial {l} g g → $L;</tRule> + <tRule>$jamoMedial {l} s s → $L;</tRule> + <tRule>$jamoMedial {n} g g → $Nf;</tRule> + <tRule>$jamoMedial {n} j j → $Nf;</tRule> + + <comment># Finals: Attach consonant with preceding medial to preceding medial.</comment> + <comment># Do this BEFORE mapping consonants to initials. Longer keys must</comment> + <comment># precede shorter keys that they start with, e.g., the rule for 'bs'</comment> + <comment># must precede 'b'.</comment> + <comment># [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this</comment> + <comment># block for Jamo-Latin.]</comment> + <tRule>$jamoMedial {bs} ↔ $BS;</tRule> + <tRule>$jamoMedial {b} ↔ $Bf;</tRule> + <tRule>$jamoMedial {ch} → $Cf;</tRule> + <tRule>$jamoMedial {c} ↔ $Cf;</tRule> + <tRule>$jamoMedial {d} ↔ $Df;</tRule> + <tRule>$jamoMedial {kk} ↔ $GGf;</tRule> + <tRule>$jamoMedial {gs} ↔ $GS;</tRule> + <tRule>$jamoMedial {g} ↔ $Gf;</tRule> + <tRule>$jamoMedial {h} ↔ $Hf;</tRule> + <tRule>$jamoMedial {j} ↔ $Jf;</tRule> + <tRule>$jamoMedial {k} ↔ $Kf;</tRule> + <tRule>$jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG;</tRule> + <tRule>$jamoMedial {lh} ↔ $LH;</tRule> + <tRule>$jamoMedial {lm} ↔ $LM;</tRule> + <tRule>$jamoMedial {lp} ↔ $LP;</tRule> + <tRule>$jamoMedial {ls} ↔ $LS;</tRule> + <tRule>$jamoMedial {lt} ↔ $LT;</tRule> + <tRule>$jamoMedial {l} ↔ $L;</tRule> + <tRule>$jamoMedial {m} ↔ $Mf;</tRule> + <tRule>$jamoMedial {ng} ↔ $NG;</tRule> + <tRule>$jamoMedial {nh} ↔ $NH;</tRule> + <tRule>$jamoMedial {nj} ↔ $NJ;</tRule> + <tRule>$jamoMedial {n} ↔ $Nf;</tRule> + <tRule>$jamoMedial {p} ↔ $Pf;</tRule> + <tRule>$jamoMedial {ss} ↔ $SSf;</tRule> + <tRule>$jamoMedial {s} ↔ $Sf;</tRule> + <tRule>$jamoMedial {t} ↔ $Tf;</tRule> + + <comment># Initials: Attach single consonant to following medial. Do this</comment> + <comment># AFTER mapping finals. Longer keys must precede shorter keys that</comment> + <comment># they start with, e.g., the rule for 'gg' must precede 'g'.</comment> + <comment># [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within</comment> + <comment># this block for Jamo-Latin.]</comment> + <tRule>{kk} $latinMedial ↔ $KKi;</tRule> + <tRule>{g} $latinMedial ↔ $Gi;</tRule> + <tRule>{n} $latinMedial ↔ $Ni;</tRule> + <tRule>{tt} $latinMedial ↔ $TTi;</tRule> + <tRule>{d} $latinMedial ↔ $Di;</tRule> + <tRule>{l} $latinMedial ↔ $Li;</tRule> + <tRule>{m} $latinMedial ↔ $Mi;</tRule> + <tRule>{pp} $latinMedial ↔ $PPi;</tRule> + <tRule>{b} $latinMedial ↔ $Bi;</tRule> + <tRule>{ss} $latinMedial ↔ $SSi;</tRule> + <tRule>{s} $latinMedial ↔ $Si;</tRule> + <tRule>{jj} $latinMedial ↔ $JJi;</tRule> + <tRule>{j} $latinMedial ↔ $Ji;</tRule> + <tRule>{ch} $latinMedial ↔ $CHi;</tRule> + <tRule>{c} $latinMedial → $CHi;</tRule> + <tRule>{k} $latinMedial ↔ $Ki;</tRule> + <tRule>{t} $latinMedial ↔ $Ti;</tRule> + <tRule>{p} $latinMedial ↔ $Pi;</tRule> + <tRule>{h} $latinMedial ↔ $Hi;</tRule> + <comment># 'r' in final position. Because of the equivalency of the 'l' and</comment> + <comment># 'r' jamo (the glyphs are the same), we try to provide the same</comment> + <comment># equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled</comment> + <comment># below. If we see an 'r' in an apparent final position, treat it</comment> + <comment># like 'l'. For example, "karka" =→ Ki A R EU Ki A without this rule.</comment> + <comment># Instead, we want Ki A L Ki A.</comment> + + <comment># Initial + Final: If we match the next rule, we have initial then</comment> + <comment># final consonant with no intervening medial. We insert the null</comment> + <comment># vowel BEFORE it to create a well-formed syllable. (In the next rule</comment> + <comment># we insert a null vowel AFTER an anomalous initial.)</comment> + + + <comment># Initial + X: This block matches an initial consonant not followed by</comment> + <comment># a medial. We insert the null vowel after it. We handle double</comment> + <comment># initials explicitly here; for single initial consonants we insert EU</comment> + <comment># (as Latin) after them and let standard rules do the rest.</comment> + <comment># BREAKS ROUND TRIP INTEGRITY</comment> + + <tRule>kk → $KKi $EU;</tRule> + <tRule>tt → $TTi $EU;</tRule> + <tRule>pp → $PPi $EU;</tRule> + <tRule>ss → $SSi $EU;</tRule> + <tRule>jj → $JJi $EU;</tRule> + <tRule>([lbdghjkmnpst]) → | $1 eu;</tRule> + + <comment># X + Final: Finally we have to deal with a consonant that can only be</comment> + <comment># interpreted as a final (not an initial) and which is preceded</comment> + <comment># neither by an initial nor a medial. It is the start of the</comment> + <comment># syllable, but cannot be. Most of these will already be handled by</comment> + <comment># the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'</comment> + <comment># 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.</comment> + <comment># For this isolated case, we could add a null initial and medial,</comment> + <comment># which would give "la" =→ IEUNG EU L IEUNG A, for example. A more</comment> + <comment># economical solution is to transliterate isolated "l" (that is,</comment> + <comment># initial "l") to "r". (Other similar conversions of consonants that</comment> + <comment># occur neither as initials nor as finals are handled below.)</comment> + <tRule>l → | r;</tRule> + + <comment># Medials. If a medial is preceded by an initial, then we proceed</comment> + <comment># normally. As usual, longer keys must precede shorter ones.</comment> + <comment># [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within</comment> + <comment># this block for Jamo-Latin.]</comment> + <tRule>$jamoInitial {ae} ↔ $AE;</tRule> + <tRule>$jamoInitial {a} ↔ $A;</tRule> + <tRule>$jamoInitial {eo} ↔ $EO;</tRule> + <tRule>$jamoInitial {eu} ↔ $EU;</tRule> + <tRule>$jamoInitial {e} ↔ $E;</tRule> + <tRule>$jamoInitial {i} ↔ $I;</tRule> + <tRule>$jamoInitial {oe} ↔ $OE;</tRule> + <tRule>$jamoInitial {o} ↔ $O;</tRule> + <tRule>$jamoInitial {ui} ↔ $UI;</tRule> + <tRule>$jamoInitial {u} ↔ $U;</tRule> + <tRule>$jamoInitial {wae} ↔ $WAE;</tRule> + <tRule>$jamoInitial {wa} ↔ $WA;</tRule> + <tRule>$jamoInitial {wo} ↔ $WO;</tRule> + <tRule>$jamoInitial {we} ↔ $WE;</tRule> + <tRule>$jamoInitial {wi} ↔ $WI;</tRule> + <tRule>$jamoInitial {yae} ↔ $YAE;</tRule> + <tRule>$jamoInitial {ya} ↔ $YA;</tRule> + <tRule>$jamoInitial {yeo} ↔ $YEO;</tRule> + <tRule>$jamoInitial {ye} ↔ $YE;</tRule> + <tRule>$jamoInitial {yo} ↔ $YO;</tRule> + <tRule>$jamoInitial {yu} ↔ $YU;</tRule> + + <comment># We may see an anomalous isolated 'w' or 'y'. In that case, we</comment> + <comment># interpret it as 'wi' and 'yu', respectively.</comment> + <comment># BREAKS ROUND TRIP INTEGRITY</comment> + <tRule>$jamoInitial {w} → | wi;</tRule> + <tRule>$jamoInitial {y} → | yu;</tRule> + + <comment># Otherwise, insert a null consonant IEUNG before the medial (which is</comment> + <comment># still an untransliterated latin vowel).</comment> + <tRule>($latinMedial) → $IEUNG | $1;</tRule> + + <comment># Convert non-jamo latin consonants to equivalents. These occur as</comment> + <comment># neither initials nor finals in jamo. 'l' occurs as a final, but not</comment> + <comment># an initial; it is handled above. The following letters (left hand</comment> + <comment># side) will never be output by Jamo-Latin.</comment> + <tRule>f → | p;</tRule> + <tRule>q → | k;</tRule> + <tRule>v → | b;</tRule> + <tRule>x → | ks;</tRule> + <tRule>z → | s;</tRule> + <tRule>r → | l;</tRule> + <tRule>c → | k;</tRule> + + <comment># Delete separators (Latin-Jamo).</comment> + <tRule>$sep → ;</tRule> + + <comment># Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,</comment> + <comment># since these may also occur in text.</comment> + + <tRule>← $IEUNG;</tRule> + <comment>#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in</comment> + <comment>#- the INDEX file. This transliterator is, by itself, not</comment> + <comment>#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or</comment> + <comment>#- inverses thereof.</comment> + <comment># eof</comment> + </transform> + </transforms> +</supplementalData> + |