Merge branch 'tt/bisect-in-c'
[git] / t / t0028-working-tree-encoding.sh
1 #!/bin/sh
2
3 test_description='working-tree-encoding conversion via gitattributes'
4
5 . ./test-lib.sh
6
7 GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
8
9 test_expect_success 'setup test files' '
10         git config core.eol lf &&
11
12         text="hallo there!\ncan you read me?" &&
13         echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
14         echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
15         printf "$text" >test.utf8.raw &&
16         printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
17         printf "$text" | iconv -f UTF-8 -t UTF-32 >test.utf32.raw &&
18         printf "\377\376"                         >test.utf16lebom.raw &&
19         printf "$text" | iconv -f UTF-8 -t UTF-32LE >>test.utf16lebom.raw &&
20
21         # Line ending tests
22         printf "one\ntwo\nthree\n" >lf.utf8.raw &&
23         printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
24
25         # BOM tests
26         printf "\0a\0b\0c"                         >nobom.utf16be.raw &&
27         printf "a\0b\0c\0"                         >nobom.utf16le.raw &&
28         printf "\376\777\0a\0b\0c"                 >bebom.utf16be.raw &&
29         printf "\777\376a\0b\0c\0"                 >lebom.utf16le.raw &&
30         printf "\0\0\0a\0\0\0b\0\0\0c"             >nobom.utf32be.raw &&
31         printf "a\0\0\0b\0\0\0c\0\0\0"             >nobom.utf32le.raw &&
32         printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
33         printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
34
35         # Add only UTF-16 file, we will add the UTF-32 file later
36         cp test.utf16.raw test.utf16 &&
37         cp test.utf32.raw test.utf32 &&
38         cp test.utf16lebom.raw test.utf16lebom &&
39         git add .gitattributes test.utf16 test.utf16lebom &&
40         git commit -m initial
41 '
42
43 test_expect_success 'ensure UTF-8 is stored in Git' '
44         test_when_finished "rm -f test.utf16.git" &&
45
46         git cat-file -p :test.utf16 >test.utf16.git &&
47         test_cmp_bin test.utf8.raw test.utf16.git
48 '
49
50 test_expect_success 're-encode to UTF-16 on checkout' '
51         test_when_finished "rm -f test.utf16.raw" &&
52
53         rm test.utf16 &&
54         git checkout test.utf16 &&
55         test_cmp_bin test.utf16.raw test.utf16
56 '
57
58 test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
59         rm test.utf16lebom &&
60         git checkout test.utf16lebom &&
61         test_cmp_bin test.utf16lebom.raw test.utf16lebom
62 '
63
64 test_expect_success 'check $GIT_DIR/info/attributes support' '
65         test_when_finished "rm -f test.utf32.git" &&
66         test_when_finished "git reset --hard HEAD" &&
67
68         echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
69         git add test.utf32 &&
70
71         git cat-file -p :test.utf32 >test.utf32.git &&
72         test_cmp_bin test.utf8.raw test.utf32.git
73 '
74
75 for i in 16 32
76 do
77         test_expect_success "check prohibited UTF-${i} BOM" '
78                 test_when_finished "git reset --hard HEAD" &&
79
80                 echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
81                 echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
82
83                 # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
84                 # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
85                 # In these cases the BOM is prohibited.
86                 cp bebom.utf${i}be.raw bebom.utf${i}be &&
87                 test_must_fail git add bebom.utf${i}be 2>err.out &&
88                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
89                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
90
91                 cp lebom.utf${i}le.raw lebom.utf${i}be &&
92                 test_must_fail git add lebom.utf${i}be 2>err.out &&
93                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
94                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
95
96                 cp bebom.utf${i}be.raw bebom.utf${i}le &&
97                 test_must_fail git add bebom.utf${i}le 2>err.out &&
98                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
99                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out &&
100
101                 cp lebom.utf${i}le.raw lebom.utf${i}le &&
102                 test_must_fail git add lebom.utf${i}le 2>err.out &&
103                 test_i18ngrep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
104                 test_i18ngrep "use UTF-${i} as working-tree-encoding" err.out
105         '
106
107         test_expect_success "check required UTF-${i} BOM" '
108                 test_when_finished "git reset --hard HEAD" &&
109
110                 echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
111
112                 cp nobom.utf${i}be.raw nobom.utf${i} &&
113                 test_must_fail git add nobom.utf${i} 2>err.out &&
114                 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
115                 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out &&
116
117                 cp nobom.utf${i}le.raw nobom.utf${i} &&
118                 test_must_fail git add nobom.utf${i} 2>err.out &&
119                 test_i18ngrep "fatal: BOM is required .* utf-${i}" err.out &&
120                 test_i18ngrep "use UTF-${i}BE or UTF-${i}LE" err.out
121         '
122
123         test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
124                 test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
125                 test_when_finished "git reset --hard HEAD^" &&
126
127                 cat lf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >lf.utf${i}.raw &&
128                 cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-${i} >crlf.utf${i}.raw &&
129                 cp crlf.utf${i}.raw eol.utf${i} &&
130
131                 cat >expectIndexLF <<-EOF &&
132                         i/lf    w/-text attr/text               eol.utf${i}
133                 EOF
134
135                 git add eol.utf${i} &&
136                 git commit -m eol &&
137
138                 # UTF-${i} with CRLF (Windows line endings)
139                 rm eol.utf${i} &&
140                 git -c core.eol=crlf checkout eol.utf${i} &&
141                 test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
142
143                 # Although the file has CRLF in the working tree,
144                 # ensure LF in the index
145                 git ls-files --eol eol.utf${i} >actual &&
146                 test_cmp expectIndexLF actual &&
147
148                 # UTF-${i} with LF (Unix line endings)
149                 rm eol.utf${i} &&
150                 git -c core.eol=lf checkout eol.utf${i} &&
151                 test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
152
153                 # The file LF in the working tree, ensure LF in the index
154                 git ls-files --eol eol.utf${i} >actual &&
155                 test_cmp expectIndexLF actual
156         '
157 done
158
159 test_expect_success 'check unsupported encodings' '
160         test_when_finished "git reset --hard HEAD" &&
161
162         echo "*.set text working-tree-encoding" >.gitattributes &&
163         printf "set" >t.set &&
164         test_must_fail git add t.set 2>err.out &&
165         test_i18ngrep "true/false are no valid working-tree-encodings" err.out &&
166
167         echo "*.unset text -working-tree-encoding" >.gitattributes &&
168         printf "unset" >t.unset &&
169         git add t.unset &&
170
171         echo "*.empty text working-tree-encoding=" >.gitattributes &&
172         printf "empty" >t.empty &&
173         git add t.empty &&
174
175         echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
176         printf "garbage" >t.garbage &&
177         test_must_fail git add t.garbage 2>err.out &&
178         test_i18ngrep "failed to encode" err.out
179 '
180
181 test_expect_success 'error if encoding round trip is not the same during refresh' '
182         BEFORE_STATE=$(git rev-parse HEAD) &&
183         test_when_finished "git reset --hard $BEFORE_STATE" &&
184
185         # Add and commit a UTF-16 file but skip the "working-tree-encoding"
186         # filter. Consequently, the in-repo representation is UTF-16 and not
187         # UTF-8. This simulates a Git version that has no working tree encoding
188         # support.
189         echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
190         echo "hallo" >nonsense.utf16le &&
191         TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
192         git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
193         COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
194         git update-ref refs/heads/master $COMMIT &&
195
196         test_must_fail git checkout HEAD^ 2>err.out &&
197         test_i18ngrep "error: .* overwritten by checkout:" err.out
198 '
199
200 test_expect_success 'error if encoding garbage is already in Git' '
201         BEFORE_STATE=$(git rev-parse HEAD) &&
202         test_when_finished "git reset --hard $BEFORE_STATE" &&
203
204         # Skip the UTF-16 filter for the added file
205         # This simulates a Git version that has no checkoutEncoding support
206         cp nobom.utf16be.raw nonsense.utf16 &&
207         TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
208         git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
209         COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
210         git update-ref refs/heads/master $COMMIT &&
211
212         git diff 2>err.out &&
213         test_i18ngrep "error: BOM is required" err.out
214 '
215
216 test_lazy_prereq ICONV_SHIFT_JIS '
217         iconv -f UTF-8 -t SHIFT-JIS </dev/null
218 '
219
220 test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
221         test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
222         test_when_finished "git reset --hard HEAD" &&
223
224         text="hallo there!\nroundtrip test here!" &&
225         printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
226         printf "$text" | iconv -f UTF-8 -t UTF-16 >roundtrip.utf16 &&
227         echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
228
229         # SHIFT-JIS encoded files are round-trip checked by default...
230         GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
231                 grep "Checking roundtrip encoding for SHIFT-JIS" &&
232         git reset &&
233
234         # ... unless we overwrite the Git config!
235         ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
236                 add .gitattributes roundtrip.shift 2>&1 |
237                 grep "Checking roundtrip encoding for SHIFT-JIS" &&
238         git reset &&
239
240         # UTF-16 encoded files should not be round-trip checked by default...
241         ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
242                 grep "Checking roundtrip encoding for UTF-16" &&
243         git reset &&
244
245         # ... unless we tell Git to check it!
246         GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
247                 add roundtrip.utf16 2>&1 |
248                 grep "Checking roundtrip encoding for utf-16" &&
249         git reset &&
250
251         # ... unless we tell Git to check it!
252         # (here we also check that the casing of the encoding is irrelevant)
253         GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
254                 add roundtrip.utf16 2>&1 |
255                 grep "Checking roundtrip encoding for utf-16" &&
256         git reset
257 '
258
259 test_done