Merge branch 'jk/complete-branch-force-delete'
[git] / t / t0028-working-tree-encoding.sh
1 #!/bin/sh
2
3 test_description='working-tree-encoding conversion via gitattributes'
4
5 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
6 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
7
8 . ./test-lib.sh
9
10 GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
11
12 test_lazy_prereq NO_UTF16_BOM '
13         test $(printf abc | iconv -f UTF-8 -t UTF-16 | wc -c) = 6
14 '
15
16 test_lazy_prereq NO_UTF32_BOM '
17         test $(printf abc | iconv -f UTF-8 -t UTF-32 | wc -c) = 12
18 '
19
20 write_utf16 () {
21         if test_have_prereq NO_UTF16_BOM
22         then
23                 printf '\376\377'
24         fi &&
25         iconv -f UTF-8 -t UTF-16
26 }
27
28 write_utf32 () {
29         if test_have_prereq NO_UTF32_BOM
30         then
31                 printf '\0\0\376\377'
32         fi &&
33         iconv -f UTF-8 -t UTF-32
34 }
35
36 test_expect_success 'setup test files' '
37         git config core.eol lf &&
38
39         text="hallo there!\ncan you read me?" &&
40         echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
41         echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
42         printf "$text" >test.utf8.raw &&
43         printf "$text" | write_utf16 >test.utf16.raw &&
44         printf "$text" | write_utf32 >test.utf32.raw &&
45         printf "\377\376"                         >test.utf16lebom.raw &&
46         printf "$text" | iconv -f UTF-8 -t UTF-16LE >>test.utf16lebom.raw &&
47
48         # Line ending tests
49         printf "one\ntwo\nthree\n" >lf.utf8.raw &&
50         printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
51
52         # BOM tests
53         printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
54         printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
55         printf "\376\377\0a\0b\0c"                 >bebom.utf16be.raw &&
56         printf "\377\376a\0b\0c\0"                 >lebom.utf16le.raw &&
57         printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
58         printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
59         printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
60         printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
61
62         # Add only UTF-16 file, we will add the UTF-32 file later
63         cp test.utf16.raw test.utf16 &&
64         cp test.utf32.raw test.utf32 &&
65         cp test.utf16lebom.raw test.utf16lebom &&
66         git add .gitattributes test.utf16 test.utf16lebom &&
67         git commit -m initial
68 '
69
70 test_expect_success 'ensure UTF-8 is stored in Git' '
71         test_when_finished "rm -f test.utf16.git" &&
72
73         git cat-file -p :test.utf16 >test.utf16.git &&
74         test_cmp_bin test.utf8.raw test.utf16.git
75 '
76
77 test_expect_success 're-encode to UTF-16 on checkout' '
78         test_when_finished "rm -f test.utf16.raw" &&
79
80         rm test.utf16 &&
81         git checkout test.utf16 &&
82         test_cmp_bin test.utf16.raw test.utf16
83 '
84
85 test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
86         rm test.utf16lebom &&
87         git checkout test.utf16lebom &&
88         test_cmp_bin test.utf16lebom.raw test.utf16lebom
89 '
90
91 test_expect_success 'check $GIT_DIR/info/attributes support' '
92         test_when_finished "rm -f test.utf32.git" &&
93         test_when_finished "git reset --hard HEAD" &&
94
95         echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
96         git add test.utf32 &&
97
98         git cat-file -p :test.utf32 >test.utf32.git &&
99         test_cmp_bin test.utf8.raw test.utf32.git
100 '
101
102 for i in 16 32
103 do
104         test_expect_success "check prohibited UTF-${i} BOM" '
105                 test_when_finished "git reset --hard HEAD" &&
106
107                 echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
108                 echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
109
110                 # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
111                 # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
112                 # In these cases the BOM is prohibited.
113                 cp bebom.utf${i}be.raw bebom.utf${i}be &&
114                 test_must_fail git add bebom.utf${i}be 2>err.out &&
115                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
116                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
117
118                 cp lebom.utf${i}le.raw lebom.utf${i}be &&
119                 test_must_fail git add lebom.utf${i}be 2>err.out &&
120                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
121                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
122
123                 cp bebom.utf${i}be.raw bebom.utf${i}le &&
124                 test_must_fail git add bebom.utf${i}le 2>err.out &&
125                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
126                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
127
128                 cp lebom.utf${i}le.raw lebom.utf${i}le &&
129                 test_must_fail git add lebom.utf${i}le 2>err.out &&
130                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
131                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
132         '
133
134         test_expect_success "check required UTF-${i} BOM" '
135                 test_when_finished "git reset --hard HEAD" &&
136
137                 echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
138
139                 cp nobom.utf${i}be.raw nobom.utf${i} &&
140                 test_must_fail git add nobom.utf${i} 2>err.out &&
141                 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
142                 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
143
144                 cp nobom.utf${i}le.raw nobom.utf${i} &&
145                 test_must_fail git add nobom.utf${i} 2>err.out &&
146                 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
147                 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
148         '
149
150         test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
151                 test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
152                 test_when_finished "git reset --hard HEAD^" &&
153
154                 cat lf.utf8.raw | write_utf${i} >lf.utf${i}.raw &&
155                 cat crlf.utf8.raw | write_utf${i} >crlf.utf${i}.raw &&
156                 cp crlf.utf${i}.raw eol.utf${i} &&
157
158                 cat >expectIndexLF <<-EOF &&
159                         i/lf    w/-text attr/text               eol.utf${i}
160                 EOF
161
162                 git add eol.utf${i} &&
163                 git commit -m eol &&
164
165                 # UTF-${i} with CRLF (Windows line endings)
166                 rm eol.utf${i} &&
167                 git -c core.eol=crlf checkout eol.utf${i} &&
168                 test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
169
170                 # Although the file has CRLF in the working tree,
171                 # ensure LF in the index
172                 git ls-files --eol eol.utf${i} >actual &&
173                 test_cmp expectIndexLF actual &&
174
175                 # UTF-${i} with LF (Unix line endings)
176                 rm eol.utf${i} &&
177                 git -c core.eol=lf checkout eol.utf${i} &&
178                 test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
179
180                 # The file LF in the working tree, ensure LF in the index
181                 git ls-files --eol eol.utf${i} >actual &&
182                 test_cmp expectIndexLF actual
183         '
184 done
185
186 test_expect_success 'check unsupported encodings' '
187         test_when_finished "git reset --hard HEAD" &&
188
189         echo "*.set text working-tree-encoding" >.gitattributes &&
190         printf "set" >t.set &&
191         test_must_fail git add t.set 2>err.out &&
192         test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
193
194         echo "*.unset text -working-tree-encoding" >.gitattributes &&
195         printf "unset" >t.unset &&
196         git add t.unset &&
197
198         echo "*.empty text working-tree-encoding=" >.gitattributes &&
199         printf "empty" >t.empty &&
200         git add t.empty &&
201
202         echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
203         printf "garbage" >t.garbage &&
204         test_must_fail git add t.garbage 2>err.out &&
205         test_i18ngrep "failed to encode" err.out
206 '
207
208 test_expect_success 'error if encoding round trip is not the same during refresh' '
209         BEFORE_STATE=$(git rev-parse HEAD) &&
210         test_when_finished "git reset --hard $BEFORE_STATE" &&
211
212         # Add and commit a UTF-16 file but skip the "working-tree-encoding"
213         # filter. Consequently, the in-repo representation is UTF-16 and not
214         # UTF-8. This simulates a Git version that has no working tree encoding
215         # support.
216         echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
217         echo "hallo" >nonsense.utf16le &&
218         TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
219         git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
220         COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
221         git update-ref refs/heads/main $COMMIT &&
222
223         test_must_fail git checkout HEAD^ 2>err.out &&
224         test_i18ngrep "error: .* overwritten by checkout:" err.out
225 '
226
227 test_expect_success 'error if encoding garbage is already in Git' '
228         BEFORE_STATE=$(git rev-parse HEAD) &&
229         test_when_finished "git reset --hard $BEFORE_STATE" &&
230
231         # Skip the UTF-16 filter for the added file
232         # This simulates a Git version that has no checkoutEncoding support
233         cp nobom.utf16be.raw nonsense.utf16 &&
234         TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
235         git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
236         COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
237         git update-ref refs/heads/main $COMMIT &&
238
239         git diff 2>err.out &&
240         test_i18ngrep "error: BOM is required" err.out
241 '
242
243 test_lazy_prereq ICONV_SHIFT_JIS '
244         iconv -f UTF-8 -t SHIFT-JIS </dev/null
245 '
246
247 test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
248         test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
249         test_when_finished "git reset --hard HEAD" &&
250
251         text="hallo there!\nroundtrip test here!" &&
252         printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
253         printf "$text" | write_utf16 >roundtrip.utf16 &&
254         echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
255
256         # SHIFT-JIS encoded files are round-trip checked by default...
257         GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
258                 grep "Checking roundtrip encoding for SHIFT-JIS" &&
259         git reset &&
260
261         # ... unless we overwrite the Git config!
262         ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
263                 add .gitattributes roundtrip.shift 2>&1 |
264                 grep "Checking roundtrip encoding for SHIFT-JIS" &&
265         git reset &&
266
267         # UTF-16 encoded files should not be round-trip checked by default...
268         ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
269                 grep "Checking roundtrip encoding for UTF-16" &&
270         git reset &&
271
272         # ... unless we tell Git to check it!
273         GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
274                 add roundtrip.utf16 2>&1 |
275                 grep "Checking roundtrip encoding for utf-16" &&
276         git reset &&
277
278         # ... unless we tell Git to check it!
279         # (here we also check that the casing of the encoding is irrelevant)
280         GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
281                 add roundtrip.utf16 2>&1 |
282                 grep "Checking roundtrip encoding for utf-16" &&
283         git reset
284 '
285
286 # $1: checkout encoding
287 # $2: test string
288 # $3: binary test string in checkout encoding
289 test_commit_utf8_checkout_other () {
290         encoding="$1"
291         orig_string="$2"
292         expect_bytes="$3"
293
294         test_expect_success "Commit UTF-8, checkout $encoding" '
295                 test_when_finished "git checkout HEAD -- .gitattributes" &&
296
297                 test_ext="commit_utf8_checkout_$encoding" &&
298                 test_file="test.$test_ext" &&
299
300                 # Commit as UTF-8
301                 echo "*.$test_ext text working-tree-encoding=UTF-8" >.gitattributes &&
302                 printf "$orig_string" >$test_file &&
303                 git add $test_file &&
304                 git commit -m "Test data" &&
305
306                 # Checkout in tested encoding
307                 rm $test_file &&
308                 echo "*.$test_ext text working-tree-encoding=$encoding" >.gitattributes &&
309                 git checkout HEAD -- $test_file &&
310
311                 # Test
312                 printf $expect_bytes >$test_file.raw &&
313                 test_cmp_bin $test_file.raw $test_file
314         '
315 }
316
317 test_commit_utf8_checkout_other "UTF-8"        "Test Тест" "\124\145\163\164\040\320\242\320\265\321\201\321\202"
318 test_commit_utf8_checkout_other "UTF-16LE"     "Test Тест" "\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
319 test_commit_utf8_checkout_other "UTF-16BE"     "Test Тест" "\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
320 test_commit_utf8_checkout_other "UTF-16LE-BOM" "Test Тест" "\377\376\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
321 test_commit_utf8_checkout_other "UTF-16BE-BOM" "Test Тест" "\376\377\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
322 test_commit_utf8_checkout_other "UTF-32LE"     "Test Тест" "\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\000\042\004\000\000\065\004\000\000\101\004\000\000\102\004\000\000"
323 test_commit_utf8_checkout_other "UTF-32BE"     "Test Тест" "\000\000\000\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\004\042\000\000\004\065\000\000\004\101\000\000\004\102"
324
325 test_done