tests/turkish-I-without-dot


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

#!/bin/sh
# grep -i would misbehave for any matched line containing a character
# (like "I" in the tr_TR.utf8 locale) whose lower-case representation
# occupies more bytes (two in this case, for 0xc4b1, aka U+0131).

# Copyright (C) 2011-2012 Free Software Foundation, Inc.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

. "${srcdir=.}/init.sh"; path_prepend_ ../src

require_tr_utf8_locale_
require_compiled_in_MB_support

# Before this change, grep could print a lot of uninitialized memory:
# $ printf "IIIIIII\n" > in
# $ for i in $(seq 10); do LC_ALL=tr_TR.utf8 src/grep -i . in|wc -c; done
# 760
# 754
# 585
# 298
# 273
# 458
# 660
# 552
# 936
# 678

fail=0

printf "IIIIIII\n" > in || framework_failure_
LC_ALL=tr_TR.utf8 grep -i .... in > out || fail=1
compare out in || fail=1

# Also exercise the case in which the original string and the lower-case
# buffer have precisely the same length (22 bytes here), yet internal
# offsets do differ.  Lengths are the same because while some bytes shrink
# when converted to lower case, others grow, and here they balance out.
i='I\xC4\xB0'
printf "$i$i$i$i$i$i$i\n" > in || framework_failure_
LC_ALL=tr_TR.utf8 grep -i .... in > out || fail=1
compare out in || fail=1

Exit $fail