summaryrefslogtreecommitdiff
path: root/src/zope/i18n/locales/data/transforms/Bulgarian-Latin-BGN.xml
blob: 2fed93aba73bb16db0e0870085bd840bdce9f819 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "http://www.unicode.org/cldr/dtd/1.5/ldmlSupplemental.dtd">
<supplementalData>
	<transforms>
		<transform source="Bulgarian" target="Latin" direction="forward" variant="BGN" draft="provisional">
			<comment>
			########################################################################
			# BGN/PCGN 1952 System
			#
			# This system was adopted by the BGN in 1949 and by the PCGN in 1952.
			# It reflects the much simplified Bulgarian orthography as officially
			# revised in February 1945. The Bulgarian alphabet contains all of
			# the characters present in the Russian alphabet with the exception
			# of Ёё, Ыы, and Ээ. Two obsolete letters Ѫѫ and Ѣѣ are also given.
			#
			# The Bulgarian Alphabet as defined by the BGN (Page 15):
			#
			#   АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢ
			#   абвгдежзийклмнопрстуфхцчшщъьюяѫѣ
			#
			# Prepared by Michael Everson &lt;everson@evertype.com&gt;
			########################################################################
			#
			# MINIMAL FILTER: Bulgarian-Latin
			#
			</comment>
			<tRule>:: [АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢабвгдежзийклмнопрстуфхцчшщъьюяѫѣ] ;</tRule>
			<tRule>:: NFD (NFC) ;</tRule>
			<comment>
			#
			########################################################################
			</comment>

			<comment>
			########################################################################
			#
			# Define All Transformation Variables
			#
			########################################################################
			#
			</comment>
			<tRule>$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ;</tRule>
			<tRule>$lowerConsonants = [бвгджзйклмнпрстфхцчшщь] ;</tRule>
			<tRule>$consonants = [$upperConsonants $lowerConsonants] ;</tRule>

			<tRule>$upperVowels = [АЕИОУЪЮЯѪѢ] ;</tRule>
			<tRule>$lowerVowels = [аеиоуъюяѫѣ] ;</tRule>
			<tRule>$vowels = [$upperVowels $lowerVowels] ;</tRule>

			<tRule>$lower = [$lowerConsonants $lowerVowels] ;</tRule>

			<tRule>$bulgarian = [ $lower $upperConsonants $upperVowels ] ;</tRule>
			<comment>
			#
			# Use this $wordBoundary until bug 2034 is fixed in ICU:
			# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
			#
			</comment>
			<tRule>$wordBoundary =  [^[:L:][:M:][:N:]] ;</tRule>
			<comment>
			#
			########################################################################
			</comment>

			<comment>
			########################################################################
			#
			# Start of Alphabetic Transformations
			#
			########################################################################
			#
			</comment>
			<tRule>А → A ; # CYRILLIC CAPITAL LETTER A</tRule>
			<tRule>а → a ; # CYRILLIC SMALL LETTER A</tRule>
			<tRule>Б → B ; # CYRILLIC CAPITAL LETTER BE</tRule>
			<tRule>б → b ; # CYRILLIC SMALL LETTER BE</tRule>
			<tRule>В → V ; # CYRILLIC CAPITAL LETTER VE</tRule>
			<tRule>в → v ; # CYRILLIC SMALL LETTER VE</tRule>
			<tRule>Г → G ; # CYRILLIC CAPITAL LETTER GHE</tRule>
			<tRule>г → g ; # CYRILLIC SMALL LETTER GHE</tRule>
			<tRule>Д → D ; # CYRILLIC CAPITAL LETTER DE</tRule>
			<tRule>д → d ; # CYRILLIC SMALL LETTER DE</tRule>
			<tRule>Е → E ; # CYRILLIC CAPITAL LETTER DE</tRule>
			<tRule>е → e ; # CYRILLIC SMALL LETTER DE</tRule>
			<tRule>Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE</tRule>
			<tRule>Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE</tRule>
			<tRule>ж → zh ; # CYRILLIC SMALL LETTER ZHE</tRule>
			<tRule>З → Z ; # CYRILLIC CAPITAL LETTER ZE</tRule>
			<tRule>з → z ; # CYRILLIC SMALL LETTER ZE</tRule>
			<tRule>И → I ; # CYRILLIC CAPITAL LETTER I</tRule>
			<tRule>и → i ; # CYRILLIC SMALL LETTER I</tRule>
			<tRule>Й → Y ; # CYRILLIC CAPITAL LETTER I</tRule>
			<tRule>й → y ; # CYRILLIC SMALL LETTER I</tRule>
			<tRule>К → K ; # CYRILLIC CAPITAL LETTER KA</tRule>
			<tRule>к → k ; # CYRILLIC SMALL LETTER KA</tRule>
			<tRule>Л → L ; # CYRILLIC CAPITAL LETTER EL</tRule>
			<tRule>л → l ; # CYRILLIC SMALL LETTER EL</tRule>
			<tRule>М → M ; # CYRILLIC CAPITAL LETTER EM</tRule>
			<tRule>м → m ; # CYRILLIC SMALL LETTER EM</tRule>
			<tRule>Н → N ; # CYRILLIC CAPITAL LETTER EN</tRule>
			<tRule>н → n ; # CYRILLIC SMALL LETTER EN</tRule>
			<tRule>О → O ; # CYRILLIC CAPITAL LETTER O</tRule>
			<tRule>о → o ; # CYRILLIC SMALL LETTER O</tRule>
			<tRule>П → P ; # CYRILLIC CAPITAL LETTER PE</tRule>
			<tRule>п → p ; # CYRILLIC SMALL LETTER PE</tRule>
			<tRule>Р → R ; # CYRILLIC CAPITAL LETTER ER</tRule>
			<tRule>р → r ; # CYRILLIC SMALL LETTER ER</tRule>
			<tRule>С → S ; # CYRILLIC CAPITAL LETTER ES</tRule>
			<tRule>с → s ; # CYRILLIC SMALL LETTER ES</tRule>
			<comment>
			#
			########################################################################
			#
			# BGN Page 16 Note 4
			#
			# тс becomes t·s
			#
			########################################################################
			#
			</comment>
			<tRule>ТС → T·S ; # CYRILLIC CAPITAL LETTER TE</tRule>
			<tRule>Тс → T·s ; # CYRILLIC CAPITAL LETTER TE</tRule>
			<tRule>тс → t·s ; # CYRILLIC SMALL LETTER TE</tRule>
			<tRule>Т → T ; # CYRILLIC CAPITAL LETTER TE</tRule>
			<tRule>т → t ; # CYRILLIC SMALL LETTER TE</tRule>
			<comment>
			#
			########################################################################
			#
			# End Note 4
			#
			########################################################################
			</comment>

			<tRule>У → U ; # CYRILLIC CAPITAL LETTER U</tRule>
			<tRule>у → u ; # CYRILLIC SMALL LETTER U</tRule>
			<tRule>Ф → F ; # CYRILLIC CAPITAL LETTER EF</tRule>
			<tRule>ф → f ; # CYRILLIC SMALL LETTER EF</tRule>
			<tRule>Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA</tRule>
			<tRule>Х → KH ; # CYRILLIC CAPITAL LETTER HA</tRule>
			<tRule>х → kh ; # CYRILLIC SMALL LETTER HA</tRule>
			<tRule>Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE</tRule>
			<tRule>Ц → TS ; # CYRILLIC CAPITAL LETTER TSE</tRule>
			<tRule>ц → ts ; # CYRILLIC SMALL LETTER TSE</tRule>
			<tRule>Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE</tRule>
			<tRule>Ч → CH ; # CYRILLIC CAPITAL LETTER CHE</tRule>
			<tRule>ч → ch ; # CYRILLIC SMALL LETTER CHE</tRule>

			<comment>
			########################################################################
			#
			# Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). 
			#
			# шт becomes sh·t
			#
			########################################################################
			#
			</comment>
			<tRule>ШТ → SH·T ; # CYRILLIC CAPITAL LETTER SHA</tRule>
			<tRule>Шт → Sh·t ; # CYRILLIC CAPITAL LETTER SHA</tRule>
			<tRule>шт → sh·t ; # CYRILLIC SMALL LETTER SHA</tRule>
			<tRule>Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA</tRule>
			<tRule>Ш → SH ; # CYRILLIC CAPITAL LETTER SHA</tRule>
			<tRule>ш → sh ; # CYRILLIC SMALL LETTER SHA</tRule>
			<tRule>Щ} $lower → Sht ; # CYRILLIC CAPITAL LETTER SHCHA</tRule>
			<tRule>Щ → SHT ; # CYRILLIC CAPITAL LETTER SHCHA</tRule>
			<tRule>щ → sht ; # CYRILLIC SMALL LETTER SHCHA</tRule>
			<comment>
			#
			########################################################################
			#
			# End Implied rule
			#
			########################################################################
			</comment>

			<tRule>Ъ → Ŭ ; # CYRILLIC CAPITAL LETTER HARD SIGN</tRule>
			<tRule>ъ → ŭ ; # CYRILLIC SMALL LETTER HARD SIGN</tRule>

			<comment>
			########################################################################
			#
			# BGN Page 16 Note 1
			#
			# In modern Bulgarian orthography, the character ъ does not occur in
			# word-final position.  It should be omitted in romanization when found
			# on older sources.
			# 
			# The following rule removes all Ъъ at the end of a word. It is assumed
			# that when the condition is met, the text must be from an older source.
			# Comment out with a '#' at the start of a line to disable.
			#
			#
			########################################################################
			#
			</comment>
			<tRule>$bulgarian { [Ъъ]  } $wordBoundary > ;</tRule>
			<comment>
			#
			########################################################################
			#
			# End BGN Page 16 Note 1
			#
			########################################################################
			</comment>

			<tRule>Ь → ’ ; # CYRILLIC CAPITAL LETTER SOFT SIGN</tRule>
			<tRule>ь → ’ ; # CYRILLIC SMALL LETTER SOFT SIGN</tRule>
			<tRule>Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU</tRule>
			<tRule>Ю → YU ; # CYRILLIC CAPITAL LETTER YU</tRule>
			<tRule>ю → yu ; # CYRILLIC SMALL LETTER YU</tRule>
			<tRule>Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA</tRule>
			<tRule>Я → YA ; # CYRILLIC CAPITAL LETTER YA</tRule>
			<tRule>я → ya ; # CYRILLIC SMALL LETTER YA</tRule>

			<comment>
			########################################################################
			#
			# BGN Page 16 Note 2 
			#
			# The obsolete character Ѫ, which was replaced by Ъ in 1945, should be 
			# romanized Ŭ.
			#
			########################################################################
			#
			</comment>
			<tRule>Ѫ → Ŭ ; # CYRILLIC CAPITAL LETTER BIG YUS</tRule>
			<tRule>ѫ → ŭ ; # CYRILLIC SMALL LETTER BIG YUS</tRule>
			<comment>
			#
			########################################################################
			#
			# End BGN Page 16 Note 2 
			#
			########################################################################
			</comment>

			<comment>
			########################################################################
			#
			# BGN Page 16 Note 3 
			#
			# The obsolete character Ѣ, replaced in 1945 by Я or Е according to local
			# pronunciation, should be romanized as e or ya, accordingly, if the
			# pronunciation is known; otherwise as ye.
			#
			########################################################################
			#
			</comment>
			<tRule>Ѣ} $lower → Ye ; # CYRILLIC CAPITAL LETTER YAT</tRule>
			<tRule>Ѣ → YE ; # CYRILLIC CAPITAL LETTER YAT</tRule>
			<tRule>ѣ → ye ; # CYRILLIC SMALL LETTER YAT</tRule>
			<comment>
			#
			# Alternative rule where appropriate for local pronounciation. To apply
			# uncomment the following by removing the '#' mark at the start of the
			# line and insert before the three rule lines above.
			#
			# Ѣ} $lower → e ; # CYRILLIC CAPITAL LETTER YAT
			# Ѣ → E ; # CYRILLIC CAPITAL LETTER YAT
			# ѣ → e ; # CYRILLIC SMALL LETTER YAT
			#
			########################################################################
			#
			# End BGN Page 16 Note 3 
			#
			########################################################################
			</comment>
		</transform>
	</transforms>
</supplementalData>