summaryrefslogtreecommitdiff
path: root/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml')
-rw-r--r--src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml489
1 files changed, 489 insertions, 0 deletions
diff --git a/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml b/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml
new file mode 100644
index 0000000..de3980c
--- /dev/null
+++ b/src/zope/i18n/locales/data/transforms/Latin-ConjoiningJamo.xml
@@ -0,0 +1,489 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd">
+<supplementalData>
+ <transforms>
+ <transform source="Latin" target="ConjoiningJamo" direction="both" visibility="internal">
+ <comment># Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303
+ </comment>
+ <comment>#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in</comment>
+ <comment>#- the INDEX file. This transliterator is, by itself, not</comment>
+ <comment>#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or</comment>
+ <comment>#- inverses thereof.</comment>
+ <comment># Transliteration from Latin characters to Korean script is done in</comment>
+ <comment># two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul</comment>
+ <comment># transliteration is done algorithmically following Unicode 3.0</comment>
+ <comment># section 3.11. This file implements the Latin to Jamo</comment>
+ <comment># transliteration using rules.</comment>
+ <comment># Jamo occupy the block 1100-11FF. Within this block there are three</comment>
+ <comment># groups of characters: initial consonants or choseong (I), medial</comment>
+ <comment># vowels or jungseong (M), and trailing consonants or jongseong (F).</comment>
+ <comment># Standard Korean syllables are of the form I+M+F*.</comment>
+ <comment># Section 3.11 describes the use of 'filler' jamo to convert</comment>
+ <comment># nonstandard syllables to standard form: the choseong filler 115F and</comment>
+ <comment># the junseong filler 1160. In this transliterator, we will not use</comment>
+ <comment># 115F or 1160.</comment>
+ <comment># We will, however, insert two 'null' jamo to make foreign words</comment>
+ <comment># conform to Korean syllable structure. These are the null initial</comment>
+ <comment># consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,</comment>
+ <comment># we will use the separator in order to disambiguate strings,</comment>
+ <comment># e.g. &quot;kan-ggan&quot; (initial GG) vs. &quot;kanggan&quot; (final NG + initial G).</comment>
+ <comment># We will not use all of the characters in the jamo block. We will</comment>
+ <comment># only use the 19 initials, 21 medials, and 27 finals possessing a</comment>
+ <comment># jamo short name as defined in section 4.4 of the Unicode book.</comment>
+ <comment># Rules of thumb. These guidelines provide the basic framework</comment>
+ <comment># for the rules. They are phrased in terms of Latin-Jamo transliteration.</comment>
+ <comment># The Jamo-Latin rules derive from these, since the Jamo-Latin rules are</comment>
+ <comment># just context-free transliteration of jamo to corresponding short names,</comment>
+ <comment># with the addition of separators to maintain round-trip integrity</comment>
+ <comment># in the context of the Latin-Jamo rules.</comment>
+ <comment># A sequence of vowels:</comment>
+ <comment># - Take the longest sequence you can. If there are too many, or you don't</comment>
+ <comment># have a starting consonant, introduce a 110B necessary.</comment>
+ <comment># A sequence of consonants.</comment>
+ <comment># - First join the double consonants: G + G -→ GG</comment>
+ <comment># - In the remaining list,</comment>
+ <comment># -- If there is no preceding vowel, take the first consonant, and insert EU</comment>
+ <comment># after it. Continue with the rest of the consonants.</comment>
+ <comment># -- If there is one consonant, attach to the following vowel</comment>
+ <comment># -- If there are two consonants and a following vowel, attach one to the</comment>
+ <comment># preceeding vowel, and one to the following vowel.</comment>
+ <comment># -- If there are more than two consonants, join the first two together if you</comment>
+ <comment># can: L + G =→ LG</comment>
+ <comment># -- If you still end up with more than 2 consonants, insert EU after the</comment>
+ <comment># first one, and continue with the rest of the consonants.</comment>
+ <comment>#----------------------------------------------------------------------</comment>
+ <comment># Variables</comment>
+ <comment># Some latin consonants or consonant pairs only occur as initials, and</comment>
+ <comment># some only as finals, but some occur as both. This makes some jamo</comment>
+ <comment># consonants ambiguous when transliterated into latin.</comment>
+
+ <comment># Initial only: IEUNG BB DD JJ R</comment>
+ <comment># Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ</comment>
+ <comment># Initial and Final: B C D G GG H J K M N P S SS T</comment>
+ <tRule>$Gi = ᄀ;</tRule>
+ <tRule>$KKi = ᄁ;</tRule>
+ <tRule>$Ni = ᄂ;</tRule>
+ <tRule>$Di = ᄃ;</tRule>
+ <tRule>$TTi = ᄄ;</tRule>
+ <tRule>$Li = ᄅ;</tRule>
+ <tRule>$Mi = ᄆ;</tRule>
+ <tRule>$Bi = ᄇ;</tRule>
+ <tRule>$PPi = ᄈ;</tRule>
+ <tRule>$Si = ᄉ;</tRule>
+ <tRule>$SSi = ᄊ;</tRule>
+ <tRule>$IEUNG = ᄋ; # null initial, inserted during Latin-Jamo</tRule>
+ <tRule>$Ji = ᄌ;</tRule>
+ <tRule>$JJi = ᄍ;</tRule>
+ <tRule>$CHi = ᄎ;</tRule>
+ <tRule>$Ki = ᄏ;</tRule>
+ <tRule>$Ti = ᄐ;</tRule>
+ <tRule>$Pi = ᄑ;</tRule>
+ <tRule>$Hi = ᄒ;</tRule>
+
+ <tRule>$A = ᅡ;</tRule>
+ <tRule>$AE = ᅢ;</tRule>
+ <tRule>$YA = ᅣ;</tRule>
+ <tRule>$YAE = ᅤ;</tRule>
+ <tRule>$EO = ᅥ;</tRule>
+ <tRule>$E = ᅦ;</tRule>
+ <tRule>$YEO = ᅧ;</tRule>
+ <tRule>$YE = ᅨ;</tRule>
+ <tRule>$O = ᅩ;</tRule>
+ <tRule>$WA = ᅪ;</tRule>
+ <tRule>$WAE = ᅫ;</tRule>
+ <tRule>$OE = ᅬ;</tRule>
+ <tRule>$YO = ᅭ;</tRule>
+ <tRule>$U = ᅮ;</tRule>
+ <tRule>$WO = ᅯ;</tRule>
+ <tRule>$WE = ᅰ;</tRule>
+ <tRule>$WI = ᅱ;</tRule>
+ <tRule>$YU = ᅲ;</tRule>
+ <tRule>$EU = ᅳ; # null medial, inserted during Latin-Jamo</tRule>
+ <tRule>$UI = ᅴ;</tRule>
+ <tRule>$I = ᅵ;</tRule>
+
+ <tRule>$Gf = ᆨ;</tRule>
+ <tRule>$GGf = ᆩ;</tRule>
+ <tRule>$GS = ᆪ;</tRule>
+ <tRule>$Nf = ᆫ;</tRule>
+ <tRule>$NJ = ᆬ;</tRule>
+ <tRule>$NH = ᆭ;</tRule>
+ <tRule>$Df = ᆮ;</tRule>
+ <tRule>$L = ᆯ;</tRule>
+ <tRule>$LG = ᆰ;</tRule>
+ <tRule>$LM = ᆱ;</tRule>
+ <tRule>$LB = ᆲ;</tRule>
+ <tRule>$LS = ᆳ;</tRule>
+ <tRule>$LT = ᆴ;</tRule>
+ <tRule>$LP = ᆵ;</tRule>
+ <tRule>$LH = ᆶ;</tRule>
+ <tRule>$Mf = ᆷ;</tRule>
+ <tRule>$Bf = ᆸ;</tRule>
+ <tRule>$BS = ᆹ;</tRule>
+ <tRule>$Sf = ᆺ;</tRule>
+ <tRule>$SSf = ᆻ;</tRule>
+ <tRule>$NG = ᆼ;</tRule>
+ <tRule>$Jf = ᆽ;</tRule>
+ <tRule>$Cf = ᆾ;</tRule>
+ <tRule>$Kf = ᆿ;</tRule>
+ <tRule>$Tf = ᇀ;</tRule>
+ <tRule>$Pf = ᇁ;</tRule>
+ <tRule>$Hf = ᇂ;</tRule>
+
+ <tRule>$jamoInitial = [ᄀ-ᄒ];</tRule>
+ <tRule>$jamoMedial = [ᅡ-ᅵ];</tRule>
+ <tRule>$latinInitial = [bcdghjklmnprst];</tRule>
+
+ <comment># Any character in the latin transliteration of a medial</comment>
+ <tRule>$latinMedial = [aeiouwy];</tRule>
+
+ <comment># The last character of the latin transliteration of a medial</comment>
+ <tRule>$latinMedialEnd = [aeiou];</tRule>
+
+ <comment># Disambiguation separator</comment>
+ <tRule>$sep = \-;</tRule>
+
+ <comment>#----------------------------------------------------------------------</comment>
+ <comment># Jamo-Latin</comment>
+ <comment># Jamo to latin is relatively simple, since it is the latin that is</comment>
+ <comment># ambiguous. Most rules are straightforward, and we encode them below</comment>
+ <comment># as simple add-on back rule, e.g.:</comment>
+ <comment># $jamoMedial {bs} → $BS;</comment>
+ <comment># becomes</comment>
+ <comment># $jamoMedial {bs} ↔ $BS;</comment>
+ <comment># Furthermore, we don't care about the ordering for Jamo-Latin because</comment>
+ <comment># we are going from single characters, so we can very easily piggyback</comment>
+ <comment># on the Latin-Jamo.</comment>
+ <comment># The main issue with Jamo-Latin is when to insert separators.</comment>
+ <comment># Separators are inserted to obtain correct round trip behavior. For</comment>
+ <comment># example, the sequence Ki A Gf Gi E, if transliterated to &quot;kagge&quot;,</comment>
+ <comment># would then round trip to Ki A GGi E. To prevent this, we insert a</comment>
+ <comment># separator: &quot;kag-ge&quot;. IMPORTANT: The need for separators depends</comment>
+ <comment># very specifically on the behavior of the Latin-Jamo rules. A change</comment>
+ <comment># in the Latin-Jamo behavior can completely change the way the</comment>
+ <comment># separator insertion must be done.</comment>
+
+ <comment># First try to preserve actual separators in the jamo text by doubling</comment>
+ <comment># them. This fixes problems like:</comment>
+ <comment># (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol</comment>
+ <comment># =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional</comment>
+ <comment># -- if we don't care about losing separators in the jamo, we can delete</comment>
+ <comment># this rule.</comment>
+ <tRule>$sep $sep ↔ $sep;</tRule>
+
+ <comment># Triple consonants. For three consonants &quot;axxx&quot; we insert a</comment>
+ <comment># separator between the first and second &quot;x&quot; if XXf, Xf, and Xi all</comment>
+ <comment># exist, and we have A Xf XXi. This prevents the reverse</comment>
+ <comment># transliteration to A XXf Xi.</comment>
+
+ <tRule>$sep ← $latinMedialEnd s {} $SSi;</tRule>
+
+ <comment># For vowels the rule is similar. If there is a vowel &quot;ae&quot; such that</comment>
+ <comment># &quot;a&quot; by itself and &quot;e&quot; by itself are vowels, then we want to map A E</comment>
+ <comment># to &quot;a-e&quot; so as not to round trip to AE. However, in the text Ki EO</comment>
+ <comment># IEUNG E we don't need to map to &quot;keo-e&quot;. &quot;keoe&quot; suffices. For</comment>
+ <comment># vowels of the form &quot;aei&quot;, both &quot;ae&quot; + &quot;i&quot; and &quot;a&quot; + &quot;ei&quot; must be</comment>
+ <comment># tested. NOTE: These rules used to have a left context of</comment>
+ <comment># $latinInitial instead of [^$latinMedial]. The problem with this is</comment>
+ <comment># sequences where an initial IEUNG is transliterated away:</comment>
+ <comment># (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O)</comment>
+ <tRule>$sep ← [^$latinMedial] [y] e {} [$O $OE];</tRule>
+ <tRule>$sep ← [^$latinMedial] e {} [$U $UI];</tRule>
+ <tRule>$sep ← [^$latinMedial] [o a] {} [$E $EO $EU];</tRule>
+ <tRule>$sep ← [^$latinMedial] [w y] a {} [$E $EO $EU];</tRule>
+ <tRule>$sep ← [^$latinMedial] [u] {} [$I];</tRule>
+
+ <comment># Similar to the above, but with an intervening $IEUNG.</comment>
+
+ <tRule>$sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE];</tRule>
+ <tRule>$sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U];</tRule>
+
+ <tRule>$sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];</tRule>
+ <tRule>$sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];</tRule>
+
+ <comment># Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,</comment>
+ <comment># where Xi also exists, must be transliterated as &quot;ax-e&quot; to prevent</comment>
+ <comment># the round trip conversion to A Xi E.</comment>
+ <tRule>$sep ← $latinMedialEnd b {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd d {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd g {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd h {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd j {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd k {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd m {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd n {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd p {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd s {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd t {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l {} $IEUNG $jamoMedial;</tRule>
+
+ <comment># Double finals followed by IEUNG. Similar to the single finals</comment>
+ <comment># followed by IEUNG. Any latin consonant pair X Y, between medials,</comment>
+ <comment># that we would split by Latin-Jamo, we must handle when it occurs as</comment>
+ <comment># part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E</comment>
+ <tRule>$sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial;</tRule>
+
+ <comment># Split doubles. Text of the form A Xi Xf E, where XXi also occurs,</comment>
+ <comment># we transliterate as &quot;ax-xe&quot; to prevent round trip transliteration as</comment>
+ <comment># A XXi E.</comment>
+
+ <tRule>$sep ← $latinMedialEnd j {} $Ji $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd k {} $Ki $jamoMedial;</tRule>
+ <tRule>$sep ← $latinMedialEnd s {} $Si $jamoMedial;</tRule>
+
+ <comment># XYY. This corresponds to the XYY rule in Latin-Jamo. By default</comment>
+ <comment># Latin-Jamo maps &quot;xyy&quot; to Xf YYi, to keep YY together. As a result,</comment>
+ <comment># &quot;xyy&quot; forms that correspond to XYf Yi must be transliterated as</comment>
+ <comment># &quot;xy-y&quot;.</comment>
+ <tRule>$sep ← $latinMedialEnd b s {} [$Si $SSi];</tRule>
+ <tRule>$sep ← $latinMedialEnd g s {} [$Si $SSi];</tRule>
+ <tRule>$sep ← $latinMedialEnd l b {} [$Bi];</tRule>
+ <tRule>$sep ← $latinMedialEnd l g {} [$Gi];</tRule>
+ <tRule>$sep ← $latinMedialEnd l s {} [$Si $SSi];</tRule>
+ <tRule>$sep ← $latinMedialEnd n g {} [$Gi];</tRule>
+ <tRule>$sep ← $latinMedialEnd n j {} [$Ji $JJi];</tRule>
+
+ <tRule>$sep ← $latinMedialEnd l {} [$PPi];</tRule>
+ <tRule>$sep ← $latinMedialEnd l {} [$TTi];</tRule>
+ <tRule>$sep ← $latinMedialEnd k {} [$KKi $Ki];</tRule>
+ <tRule>$sep ← $latinMedialEnd p {} $Pi;</tRule>
+ <tRule>$sep ← $latinMedialEnd t {} $Ti;</tRule>
+ <tRule>$sep ← $latinMedialEnd c {} [$Hi];</tRule>
+
+ <comment># Deletion of IEUNG is handled below.</comment>
+ <comment>#----------------------------------------------------------------------</comment>
+ <comment># Latin-Jamo</comment>
+ <comment># [Basic, context-free Jamo-Latin rules are embedded here too. See</comment>
+ <comment># above.]</comment>
+ <comment># Split digraphs: Text of the form 'axye', where 'xy' is a final</comment>
+ <comment># digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and</comment>
+ <comment># 'e' are medials, we want to transliterate this as A Xf Yi E rather</comment>
+ <comment># than A XYf IEUNG E. We do NOT include text of the form &quot;axxe&quot;,</comment>
+ <comment># since that is handled differently below. These rules are generated</comment>
+ <comment># programmatically from the jamo data.</comment>
+ <tRule>$jamoMedial {b s} $latinMedial → $Bf $Si;</tRule>
+ <tRule>$jamoMedial {g s} $latinMedial → $Gf $Si;</tRule>
+ <tRule>$jamoMedial {l b} $latinMedial → $L $Bi;</tRule>
+ <tRule>$jamoMedial {l g} $latinMedial → $L $Gi;</tRule>
+ <tRule>$jamoMedial {l h} $latinMedial → $L $Hi;</tRule>
+ <tRule>$jamoMedial {l m} $latinMedial → $L $Mi;</tRule>
+ <tRule>$jamoMedial {l p} $latinMedial → $L $Pi;</tRule>
+ <tRule>$jamoMedial {l s} $latinMedial → $L $Si;</tRule>
+ <tRule>$jamoMedial {l t} $latinMedial → $L $Ti;</tRule>
+ <tRule>$jamoMedial {n g} $latinMedial → $Nf $Gi;</tRule>
+ <tRule>$jamoMedial {n h} $latinMedial → $Nf $Hi;</tRule>
+ <tRule>$jamoMedial {n j} $latinMedial → $Nf $Ji;</tRule>
+
+ <comment># Single consonants are initials: Text of the form 'axe', where 'x'</comment>
+ <comment># can be an initial or a final, and 'a' and 'e' are medials, we want</comment>
+ <comment># to transliterate as A Xi E rather than A Xf IEUNG E.</comment>
+ <tRule>$jamoMedial {b} $latinMedial → $Bi;</tRule>
+ <tRule>$jamoMedial {ch} $latinMedial → $CHi;</tRule>
+ <tRule>$jamoMedial {d} $latinMedial → $Di;</tRule>
+ <tRule>$jamoMedial {g} $latinMedial → $Gi;</tRule>
+ <tRule>$jamoMedial {h} $latinMedial → $Hi;</tRule>
+ <tRule>$jamoMedial {j} $latinMedial → $Ji;</tRule>
+ <tRule>$jamoMedial {k} $latinMedial → $Ki;</tRule>
+ <tRule>$jamoMedial {m} $latinMedial → $Mi;</tRule>
+ <tRule>$jamoMedial {n} $latinMedial → $Ni;</tRule>
+ <tRule>$jamoMedial {p} $latinMedial → $Pi;</tRule>
+ <tRule>$jamoMedial {s} $latinMedial → $Si;</tRule>
+ <tRule>$jamoMedial {t} $latinMedial → $Ti;</tRule>
+ <tRule>$jamoMedial {l} $latinMedial → $Li;</tRule>
+
+ <comment># Doubled initials. The sequence &quot;axxe&quot;, where XX exists as an initial</comment>
+ <comment># (XXi), and also Xi and Xf exist (true of all digraphs XX), we want</comment>
+ <comment># to transliterate as A XXi E, rather than split to A Xf Xi E.</comment>
+ <tRule>$jamoMedial {pp} $latinMedial → $PPi;</tRule>
+ <tRule>$jamoMedial {t t} $latinMedial → $TTi;</tRule>
+ <tRule>$jamoMedial {j j} $latinMedial → $JJi;</tRule>
+ <tRule>$jamoMedial {k k} $latinMedial → $KKi;</tRule>
+ <tRule>$jamoMedial {s s} $latinMedial → $SSi;</tRule>
+
+ <comment># XYY. Because doubled consonants bind more strongly than XY</comment>
+ <comment># consonants, we must handle the sequence &quot;axyy&quot; specially. Here XYf</comment>
+ <comment># and YYi must exist. In these cases, we map to Xf YYi rather than</comment>
+ <comment># XYf.</comment>
+ <tRule>$jamoMedial {b} s s → $Bf;</tRule>
+ <tRule>$jamoMedial {g} s s → $Gf;</tRule>
+ <tRule>$jamoMedial {l} b b → $L;</tRule>
+ <tRule>$jamoMedial {l} g g → $L;</tRule>
+ <tRule>$jamoMedial {l} s s → $L;</tRule>
+ <tRule>$jamoMedial {n} g g → $Nf;</tRule>
+ <tRule>$jamoMedial {n} j j → $Nf;</tRule>
+
+ <comment># Finals: Attach consonant with preceding medial to preceding medial.</comment>
+ <comment># Do this BEFORE mapping consonants to initials. Longer keys must</comment>
+ <comment># precede shorter keys that they start with, e.g., the rule for 'bs'</comment>
+ <comment># must precede 'b'.</comment>
+ <comment># [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this</comment>
+ <comment># block for Jamo-Latin.]</comment>
+ <tRule>$jamoMedial {bs} ↔ $BS;</tRule>
+ <tRule>$jamoMedial {b} ↔ $Bf;</tRule>
+ <tRule>$jamoMedial {ch} → $Cf;</tRule>
+ <tRule>$jamoMedial {c} ↔ $Cf;</tRule>
+ <tRule>$jamoMedial {d} ↔ $Df;</tRule>
+ <tRule>$jamoMedial {kk} ↔ $GGf;</tRule>
+ <tRule>$jamoMedial {gs} ↔ $GS;</tRule>
+ <tRule>$jamoMedial {g} ↔ $Gf;</tRule>
+ <tRule>$jamoMedial {h} ↔ $Hf;</tRule>
+ <tRule>$jamoMedial {j} ↔ $Jf;</tRule>
+ <tRule>$jamoMedial {k} ↔ $Kf;</tRule>
+ <tRule>$jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG;</tRule>
+ <tRule>$jamoMedial {lh} ↔ $LH;</tRule>
+ <tRule>$jamoMedial {lm} ↔ $LM;</tRule>
+ <tRule>$jamoMedial {lp} ↔ $LP;</tRule>
+ <tRule>$jamoMedial {ls} ↔ $LS;</tRule>
+ <tRule>$jamoMedial {lt} ↔ $LT;</tRule>
+ <tRule>$jamoMedial {l} ↔ $L;</tRule>
+ <tRule>$jamoMedial {m} ↔ $Mf;</tRule>
+ <tRule>$jamoMedial {ng} ↔ $NG;</tRule>
+ <tRule>$jamoMedial {nh} ↔ $NH;</tRule>
+ <tRule>$jamoMedial {nj} ↔ $NJ;</tRule>
+ <tRule>$jamoMedial {n} ↔ $Nf;</tRule>
+ <tRule>$jamoMedial {p} ↔ $Pf;</tRule>
+ <tRule>$jamoMedial {ss} ↔ $SSf;</tRule>
+ <tRule>$jamoMedial {s} ↔ $Sf;</tRule>
+ <tRule>$jamoMedial {t} ↔ $Tf;</tRule>
+
+ <comment># Initials: Attach single consonant to following medial. Do this</comment>
+ <comment># AFTER mapping finals. Longer keys must precede shorter keys that</comment>
+ <comment># they start with, e.g., the rule for 'gg' must precede 'g'.</comment>
+ <comment># [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within</comment>
+ <comment># this block for Jamo-Latin.]</comment>
+ <tRule>{kk} $latinMedial ↔ $KKi;</tRule>
+ <tRule>{g} $latinMedial ↔ $Gi;</tRule>
+ <tRule>{n} $latinMedial ↔ $Ni;</tRule>
+ <tRule>{tt} $latinMedial ↔ $TTi;</tRule>
+ <tRule>{d} $latinMedial ↔ $Di;</tRule>
+ <tRule>{l} $latinMedial ↔ $Li;</tRule>
+ <tRule>{m} $latinMedial ↔ $Mi;</tRule>
+ <tRule>{pp} $latinMedial ↔ $PPi;</tRule>
+ <tRule>{b} $latinMedial ↔ $Bi;</tRule>
+ <tRule>{ss} $latinMedial ↔ $SSi;</tRule>
+ <tRule>{s} $latinMedial ↔ $Si;</tRule>
+ <tRule>{jj} $latinMedial ↔ $JJi;</tRule>
+ <tRule>{j} $latinMedial ↔ $Ji;</tRule>
+ <tRule>{ch} $latinMedial ↔ $CHi;</tRule>
+ <tRule>{c} $latinMedial → $CHi;</tRule>
+ <tRule>{k} $latinMedial ↔ $Ki;</tRule>
+ <tRule>{t} $latinMedial ↔ $Ti;</tRule>
+ <tRule>{p} $latinMedial ↔ $Pi;</tRule>
+ <tRule>{h} $latinMedial ↔ $Hi;</tRule>
+ <comment># 'r' in final position. Because of the equivalency of the 'l' and</comment>
+ <comment># 'r' jamo (the glyphs are the same), we try to provide the same</comment>
+ <comment># equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled</comment>
+ <comment># below. If we see an 'r' in an apparent final position, treat it</comment>
+ <comment># like 'l'. For example, &quot;karka&quot; =→ Ki A R EU Ki A without this rule.</comment>
+ <comment># Instead, we want Ki A L Ki A.</comment>
+
+ <comment># Initial + Final: If we match the next rule, we have initial then</comment>
+ <comment># final consonant with no intervening medial. We insert the null</comment>
+ <comment># vowel BEFORE it to create a well-formed syllable. (In the next rule</comment>
+ <comment># we insert a null vowel AFTER an anomalous initial.)</comment>
+
+
+ <comment># Initial + X: This block matches an initial consonant not followed by</comment>
+ <comment># a medial. We insert the null vowel after it. We handle double</comment>
+ <comment># initials explicitly here; for single initial consonants we insert EU</comment>
+ <comment># (as Latin) after them and let standard rules do the rest.</comment>
+ <comment># BREAKS ROUND TRIP INTEGRITY</comment>
+
+ <tRule>kk → $KKi $EU;</tRule>
+ <tRule>tt → $TTi $EU;</tRule>
+ <tRule>pp → $PPi $EU;</tRule>
+ <tRule>ss → $SSi $EU;</tRule>
+ <tRule>jj → $JJi $EU;</tRule>
+ <tRule>([lbdghjkmnpst]) → | $1 eu;</tRule>
+
+ <comment># X + Final: Finally we have to deal with a consonant that can only be</comment>
+ <comment># interpreted as a final (not an initial) and which is preceded</comment>
+ <comment># neither by an initial nor a medial. It is the start of the</comment>
+ <comment># syllable, but cannot be. Most of these will already be handled by</comment>
+ <comment># the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'</comment>
+ <comment># 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.</comment>
+ <comment># For this isolated case, we could add a null initial and medial,</comment>
+ <comment># which would give &quot;la&quot; =→ IEUNG EU L IEUNG A, for example. A more</comment>
+ <comment># economical solution is to transliterate isolated &quot;l&quot; (that is,</comment>
+ <comment># initial &quot;l&quot;) to &quot;r&quot;. (Other similar conversions of consonants that</comment>
+ <comment># occur neither as initials nor as finals are handled below.)</comment>
+ <tRule>l → | r;</tRule>
+
+ <comment># Medials. If a medial is preceded by an initial, then we proceed</comment>
+ <comment># normally. As usual, longer keys must precede shorter ones.</comment>
+ <comment># [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within</comment>
+ <comment># this block for Jamo-Latin.]</comment>
+ <tRule>$jamoInitial {ae} ↔ $AE;</tRule>
+ <tRule>$jamoInitial {a} ↔ $A;</tRule>
+ <tRule>$jamoInitial {eo} ↔ $EO;</tRule>
+ <tRule>$jamoInitial {eu} ↔ $EU;</tRule>
+ <tRule>$jamoInitial {e} ↔ $E;</tRule>
+ <tRule>$jamoInitial {i} ↔ $I;</tRule>
+ <tRule>$jamoInitial {oe} ↔ $OE;</tRule>
+ <tRule>$jamoInitial {o} ↔ $O;</tRule>
+ <tRule>$jamoInitial {ui} ↔ $UI;</tRule>
+ <tRule>$jamoInitial {u} ↔ $U;</tRule>
+ <tRule>$jamoInitial {wae} ↔ $WAE;</tRule>
+ <tRule>$jamoInitial {wa} ↔ $WA;</tRule>
+ <tRule>$jamoInitial {wo} ↔ $WO;</tRule>
+ <tRule>$jamoInitial {we} ↔ $WE;</tRule>
+ <tRule>$jamoInitial {wi} ↔ $WI;</tRule>
+ <tRule>$jamoInitial {yae} ↔ $YAE;</tRule>
+ <tRule>$jamoInitial {ya} ↔ $YA;</tRule>
+ <tRule>$jamoInitial {yeo} ↔ $YEO;</tRule>
+ <tRule>$jamoInitial {ye} ↔ $YE;</tRule>
+ <tRule>$jamoInitial {yo} ↔ $YO;</tRule>
+ <tRule>$jamoInitial {yu} ↔ $YU;</tRule>
+
+ <comment># We may see an anomalous isolated 'w' or 'y'. In that case, we</comment>
+ <comment># interpret it as 'wi' and 'yu', respectively.</comment>
+ <comment># BREAKS ROUND TRIP INTEGRITY</comment>
+ <tRule>$jamoInitial {w} → | wi;</tRule>
+ <tRule>$jamoInitial {y} → | yu;</tRule>
+
+ <comment># Otherwise, insert a null consonant IEUNG before the medial (which is</comment>
+ <comment># still an untransliterated latin vowel).</comment>
+ <tRule>($latinMedial) → $IEUNG | $1;</tRule>
+
+ <comment># Convert non-jamo latin consonants to equivalents. These occur as</comment>
+ <comment># neither initials nor finals in jamo. 'l' occurs as a final, but not</comment>
+ <comment># an initial; it is handled above. The following letters (left hand</comment>
+ <comment># side) will never be output by Jamo-Latin.</comment>
+ <tRule>f → | p;</tRule>
+ <tRule>q → | k;</tRule>
+ <tRule>v → | b;</tRule>
+ <tRule>x → | ks;</tRule>
+ <tRule>z → | s;</tRule>
+ <tRule>r → | l;</tRule>
+ <tRule>c → | k;</tRule>
+
+ <comment># Delete separators (Latin-Jamo).</comment>
+ <tRule>$sep → ;</tRule>
+
+ <comment># Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,</comment>
+ <comment># since these may also occur in text.</comment>
+
+ <tRule>← $IEUNG;</tRule>
+ <comment>#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in</comment>
+ <comment>#- the INDEX file. This transliterator is, by itself, not</comment>
+ <comment>#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or</comment>
+ <comment>#- inverses thereof.</comment>
+ <comment># eof</comment>
+ </transform>
+ </transforms>
+</supplementalData>
+