1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
|
;;; ucs-normalize-tests.el --- tests for international/ucs-normalize.el -*- lexical-binding: t -*-
;; Copyright (C) 2002-2021 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; The Part1 test takes a long time because it goes over the whole
;; unicode character set; you should build Emacs with optimization
;; enabled before running it.
;;
;; If there are lines marked as failing (see
;; `ucs-normalize-tests--failing-lines-part1' and
;; `ucs-normalize-tests--failing-lines-part2'), they may need to be
;; adjusted when NormalizationTest.txt is updated. Run the function
;; `ucs-normalize-check-failing-lines' to see what changes are needed.
;;; Code:
(eval-when-compile (require 'cl-lib))
(require 'seq)
(require 'ert)
(require 'ucs-normalize)
(defconst ucs-normalize-test-data-file
(expand-file-name "admin/unidata/NormalizationTest.txt" source-directory))
(defun ucs-normalize-tests--parse-column ()
(let ((chars nil)
(term nil))
(while (and (not (eq term ?\;))
(looking-at "\\([[:xdigit:]]\\{4,6\\}\\)\\([; ]\\)"))
(let ((code-point (match-string-no-properties 1)))
(setq term (char-after (match-beginning 2)))
(goto-char (match-end 0))
(push (string-to-number code-point 16) chars)))
(apply #'string (nreverse chars))))
(defconst ucs-normalize-tests--norm-buf (generate-new-buffer " *ucs-normalizing-buffer*"))
(defmacro ucs-normalize-tests--normalization-equal-p (norm str equal-to)
"Like `ucs-normalize-string' but reuse current buffer for efficiency.
And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(let ((norm-alist '((NFC . ucs-normalize-NFC-region)
(NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf
(erase-buffer)
(insert ,str)
(,(cdr (assq norm norm-alist)) (point-min) (point-max))
(goto-char (point-min))
(insert ,equal-to)
(eq (compare-buffer-substrings nil nil (point) nil (point) nil) 0))))
(defmacro ucs-normalize-tests--normalization-chareq-p (norm char char-eq-to)
"Like `ucs-normalize-string' but reuse current buffer for efficiency.
And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(let ((norm-alist '((NFC . ucs-normalize-NFC-region)
(NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf
(erase-buffer)
(insert ,char)
(,(cdr (assq norm norm-alist)) (point-min) (point-max))
(and (eq (buffer-size) 1)
(eq (char-after (point-min)) ,char-eq-to)))))
(defvar ucs-normalize-tests--chars-part1 nil)
(defsubst ucs-normalize-tests--rule1-holds-p (source nfc nfd nfkc nfkd)
"Check 1st conformance rule.
The following invariants must be true for all conformant implementations..."
(when ucs-normalize-tests--chars-part1
;; See `ucs-normalize-tests--rule2-holds-p'.
(aset ucs-normalize-tests--chars-part1
(aref source 0) 1))
(and
;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3)
(ucs-normalize-tests--normalization-equal-p NFC source nfc)
(ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
(ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
;; c4 == toNFC(c4) == toNFC(c5)
(ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
(ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3)
(ucs-normalize-tests--normalization-equal-p NFD source nfd)
(ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
(ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
;; c5 == toNFD(c4) == toNFD(c5)
(ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
(ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
(ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
(ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd)))
(defsubst ucs-normalize-tests--rule2-holds-p (X)
"Check 2nd conformance rule.
For every code point X assigned in this version of Unicode that
is not specifically listed in Part 1, the following invariants
must be true for all conformant implementations:
X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)"
(and (ucs-normalize-tests--normalization-chareq-p NFC X X)
(ucs-normalize-tests--normalization-chareq-p NFD X X)
(ucs-normalize-tests--normalization-chareq-p NFKC X X)
(ucs-normalize-tests--normalization-chareq-p NFKD X X)))
(cl-defun ucs-normalize-tests--rule1-failing-for-partX (part &optional skip-lines &key progress-str)
"Returns a list of failed line numbers."
(with-temp-buffer
(insert-file-contents ucs-normalize-test-data-file)
(let ((beg-line (progn (search-forward (format "@Part%d" part))
(forward-line)
(line-number-at-pos)))
(end-line (progn (or (search-forward (format "@Part%d" (1+ part)) nil t)
(goto-char (point-max)))
(line-number-at-pos))))
(goto-char (point-min))
(forward-line (1- beg-line))
(cl-loop with reporter = (if progress-str (make-progress-reporter
progress-str beg-line end-line
0 nil 0.5))
for line from beg-line to (1- end-line)
unless (or (eq (following-char) ?#)
(ucs-normalize-tests--rule1-holds-p
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column))
(memq line skip-lines))
collect line
do (forward-line)
if reporter do (progress-reporter-update reporter line)))))
(defun ucs-normalize-tests--rule1-failing-for-lines (lines)
"Returns a list of failed line numbers."
(with-temp-buffer
(insert-file-contents ucs-normalize-test-data-file)
(goto-char (point-min))
(cl-loop for prev-line = 1 then line
for line in lines
do (forward-line (- line prev-line))
unless (ucs-normalize-tests--rule1-holds-p
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column)
(ucs-normalize-tests--parse-column))
collect line)))
(ert-deftest ucs-normalize-part0 ()
(should-not (ucs-normalize-tests--rule1-failing-for-partX 0)))
(defconst ucs-normalize-tests--failing-lines-part1
(list 2412 2413 2414 15133 15134 15135 15136 15137
15138 15139 15140 15141 15142 15143 15144 15145
15146 15147 15148 15149 15150 15151 15152 15153
15154 15155 15156 15157 15158 15159 15160 15161
15162 15163 15164 15165 15166 15167 15168 15169
15170 15171 15172 15173 15174 15175 15176 15177
15178 15179 15180 15181 15182 15183 15184 15185
15186 15187 15188 15192 15193 15194 15195 15196
15197 15198 15199 15200 15201 16211 16212 16213
16214 16215 16216 16217 16218 16219 16220 16221
16222 16223 16224 16225 16226 16227 16228 16229
16230 16231 16232 16233 16234 16235 16236 16237
16238 16239 16240 16241 16242 16243 16244 16245
16246 16247 16248 16249 16250 16251 16252 16253
16254 16255 16256 16257 16258 16259 16260 16261
16262 16263 16264 16265 16266 16267 16268 16269
16270 16271 16272 16273 16274 16275 16276 16277
16278 16279 16280 16281 16282 16283 16284 16285
16286 16287 16288 16289 16290 16291 16292 16293
16294 16295 16296 16297 16298 16299 16300 16301
16302 16303 16304 16305 16306 16307 16308 16309
16310 16311 16312 16313 16314 16315 16316 16317
16318 16319 16320 16321 16322 16323 16324 16325
16326 16327 16328 16329 16330 16331 16332 16333
16334 16335 16336 16337 16338 16339 16340 16341
16342 16343 16344 16345 16346 16347 16348 16349
16350 16351 16488 16489 16490 16491 16492 16493
16494 16495 16496 16497))
;; Keep a record of failures, for consulting afterwards (the ert
;; backtrace only shows a truncated version of these lists).
(defvar ucs-normalize-tests--part1-rule1-failed-lines nil
"A list of line numbers.")
(defvar ucs-normalize-tests--part1-rule2-failed-chars nil
"A list of code points.")
(defvar ucs-normalize-tests--part2-rule1-failed-lines nil
"A list of line numbers.")
(defun ucs-normalize-tests--part1-rule2 (chars-part1)
(let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
0 (max-char)))
(failed-chars nil))
(map-char-table
(lambda (char-range listed-in-part)
(unless (eq listed-in-part 1)
(if (characterp char-range)
(progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
(push char-range failed-chars))
(progress-reporter-update reporter char-range))
(cl-loop for char from (car char-range) to (cdr char-range)
unless (ucs-normalize-tests--rule2-holds-p char)
do (push char failed-chars)
do (progress-reporter-update reporter char)))))
chars-part1)
(progress-reporter-done reporter)
failed-chars))
(ert-deftest ucs-normalize-part1 ()
:tags '(:expensive-test)
(skip-unless (not (getenv "EMACS_HYDRA_CI"))) ; SLOW ~ 1800s
;; This takes a long time, so make sure we're compiled.
(dolist (fun '(ucs-normalize-tests--part1-rule2
ucs-normalize-tests--rule1-failing-for-partX
ucs-normalize-tests--rule1-holds-p
ucs-normalize-tests--rule2-holds-p))
(or (byte-code-function-p (symbol-function fun))
(byte-compile fun)))
(let ((ucs-normalize-tests--chars-part1 (make-char-table 'ucs-normalize-tests t)))
(setq ucs-normalize-tests--part1-rule1-failed-lines
(ucs-normalize-tests--rule1-failing-for-partX
1 ucs-normalize-tests--failing-lines-part1
:progress-str "UCS Normalize Test Part1, rule 1"))
(setq ucs-normalize-tests--part1-rule2-failed-chars
(ucs-normalize-tests--part1-rule2
ucs-normalize-tests--chars-part1))
(should-not ucs-normalize-tests--part1-rule1-failed-lines)
(should-not ucs-normalize-tests--part1-rule2-failed-chars)))
(ert-deftest ucs-normalize-part1-failing ()
:expected-result :failed
(skip-unless ucs-normalize-tests--failing-lines-part1)
(should-not
(ucs-normalize-tests--rule1-failing-for-lines
ucs-normalize-tests--failing-lines-part1)))
(defconst ucs-normalize-tests--failing-lines-part2
(list 17087 17088 17089 17090 17091 17092 17093 17094
17098 17099 17100 17101 17102 17103 17104 17105
17106 17107 17108 17113 17114 17115 17116 17117
17118 17119 17120 17125 17126 17127 17128 17129
17130 17131 17132 17133 17134 17135 17136 17137
17138 17139 17140 17141 17142 17143 17144 17145
17146 17157 17158 17159 17160 17161 17162 17163
17164 17185 17186 17187 17188 17189 17190 17197
17198 17199 17200 17207 17208 17209 17210 17211
17212 17213 17214 17219 17220 17221 17222 17275
17276 17285 17286 17295 17296 17309 17310 17311
17312 17313 17314 17315 17316 17317 17318 17319
17320 17325 17326 17373 17374 17419 17420 17421
17422 17433 17434 17439 17440 17465 17466 17473
17474 17479 17480 17485 17486 17491 17492 17497
17498 17499 17500 17501 17502 17505 17506 17507
17508 17511 17512 17519 17520 17523 17524 17527
17528 17531 17532 17551 17552 17555 17556 17599
17600 17601 17602 17603 17604 17605 17607 17608
17609 17610 17611 17612 17613 17615 17617 17619
17621 17623 17625 17627 17629 17631 17632 17633
17634 17635 17636 17637 17638 17639 17640 17669
17670 17675 17676 17681 17682 17689 17690 17691
17692 17693 17694 17707 17708 17713 17714 17715
17716 17727 17728 17733 17734 17739 17740 17745
17746 17749 17750 17753 17754 17759 17760 17767
17768 17807 17808 17809 17810 17811 17812 17813
17814 17816 17843 17844 17845 17846 17851 17852
17861 17875 17876 17879 17880 17899 17900 17911
17912 17913 17914 17915 17916 17917 17918 17919
17920 17921 17922 17927 17928 17929 17930 17931
17932 17933 17935 17937 17938 17939 17940 17941
17943 17945 17947 17949 17951 17952 17953 17955
17957 17959 17961 17962 17967 17968 17987 17988
17993 17994 18003 18004 18005 18006 18007 18008
18009 18010 18011 18012 18017 18018 18019 18020
18021 18022 18023 18024 18041 18042 18053 18054
18069 18070 18079 18080 18163 18164 18165 18166
18171 18172 18175 18176 18211 18212 18219 18220
18221 18222 18223 18224 18225 18226 18301 18302
18389 18390 18391 18392 18393 18394 18397 18398
18407 18408 18439 18440 18441 18442 18443 18444
18445 18446 18447 18448 18449 18450 18451 18452
18457 18458 18459 18460 18471 18472 18479 18480
18485 18486 18499 18500 18501 18502 18509 18510
18513 18514 18515 18516 18517 18518 18519 18520
18521 18523 18524 18525 18527 18528 18531 18537
18538 18539 18541 18543 18545 18547 18549 18550
18551 18553 18554 18555 18557 18558 18559 18560
18561 18563 18564 18565 18566 18567 18569 18571
18573 18575 18577 18579 18581 18583 18585 18587
18589 18591 18593 18595 18596 18597 18599 18601
18602 18603 18605 18606 18607 18609 18611 18612
18613 18615 18617 18618 18619 18621 18623 18624
18625 18627 18629 18631 18633 18635 18636 18637
18639 18641 18643 18645 18647 18649 18651 18653
18655 18657 18659 18661 18663 18665 18667 18668
18669 18670 18671 18674 18676 18686 18688 18690
18692 18694 18695 18696 18697 18698 18699 18700
18701 18702 18703 18704 18705 18706 18707 18708
18709 18710 18721 18722 18723 18724 18739 18741
18743 18745 18747 18749 18751 18753 18755 18757
18759 18761 18763 18765 18767 18769 18771 18773
18775 18777 18779 18781 18783 18785 18787 18789
18791 18793 18795 18797 18799 18801 18803 18805
18807 18809 18811 18813 18815 18817 18819 18821
18823 18825 18827 18829 18831 18833 18835 18837
18839 18840 18841 18842 18843 18844 18845 18846
18847 18848 18849 18850 18851 18852 18853 18855
18857 18859 18861 18863 18865 18866))
(ert-deftest ucs-normalize-part2 ()
:tags '(:expensive-test)
(should-not
(setq ucs-normalize-tests--part2-rule1-failed-lines
(ucs-normalize-tests--rule1-failing-for-partX
2 ucs-normalize-tests--failing-lines-part2
:progress-str "UCS Normalize Test Part2"))))
(ert-deftest ucs-normalize-part2-failing ()
:expected-result :failed
(skip-unless ucs-normalize-tests--failing-lines-part2)
(should-not
(ucs-normalize-tests--rule1-failing-for-lines
ucs-normalize-tests--failing-lines-part2)))
(ert-deftest ucs-normalize-part3 ()
(should-not
(ucs-normalize-tests--rule1-failing-for-partX 3)))
(defun ucs-normalize-tests--insert-failing-lines (var newval)
(insert (format "`%s' should be updated to:\n
\(defconst %s
(list " var var))
(dolist (linos (seq-partition newval 8))
(insert (mapconcat #'number-to-string linos " ") "\n"))
(insert "))"))
(defun ucs-normalize-check-failing-lines ()
(interactive)
(let ((ucs-normalize-tests--failing-lines-part1 nil)
(ucs-normalize-tests--failing-lines-part2 nil))
(setq ucs-normalize-tests--part1-rule1-failed-lines nil)
(setq ucs-normalize-tests--part1-rule2-failed-chars nil)
(setq ucs-normalize-tests--part2-rule1-failed-lines nil)
(ert "\\`ucs-normalize"))
(with-current-buffer (get-buffer-create "*ucs normalize change bad lines*")
(erase-buffer)
(unless (equal ucs-normalize-tests--part1-rule1-failed-lines
ucs-normalize-tests--failing-lines-part1)
(ucs-normalize-tests--insert-failing-lines
'ucs-normalize-tests--failing-lines-part1
ucs-normalize-tests--part1-rule1-failed-lines))
(when ucs-normalize-tests--part1-rule2-failed-chars
(insert (format "Some characters failed rule 2!\n\n%S"
`(list ,@ucs-normalize-tests--part1-rule2-failed-chars))))
(unless (equal ucs-normalize-tests--part2-rule1-failed-lines
ucs-normalize-tests--failing-lines-part2)
(ucs-normalize-tests--insert-failing-lines
'ucs-normalize-tests--failing-lines-part2
ucs-normalize-tests--part2-rule1-failed-lines))
(if (> (buffer-size) 0)
(if noninteractive
(princ (buffer-string) standard-output)
(display-buffer (current-buffer)))
(message "No changes to failing lines needed"))))
(ert-deftest ucs-normalize-save-match-data ()
"Verify that match data isn't clobbered (bug#41445)"
(string-match (rx (+ digit)) "a47b")
(should (equal (match-data t) '(1 3)))
(should (equal
(decode-coding-string
(encode-coding-string "Käsesoßenrührlöffel" 'utf-8-hfs)
'utf-8-hfs)
"Käsesoßenrührlöffel"))
(should (equal (match-data t) '(1 3))))
;;; ucs-normalize-tests.el ends here
|