3 test_description='working-tree-encoding conversion via gitattributes'
7 GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
9 test_expect_success 'setup test repo' '
10 git config core.eol lf &&
12 text="hallo there!\ncan you read me?" &&
13 echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
14 printf "$text" >test.utf8.raw &&
15 printf "$text" | iconv -f UTF-8 -t UTF-16 >test.utf16.raw &&
16 cp test.utf16.raw test.utf16 &&
18 git add .gitattributes test.utf16 &&
22 test_expect_success 'ensure UTF-8 is stored in Git' '
23 git cat-file -p :test.utf16 >test.utf16.git &&
24 test_cmp_bin test.utf8.raw test.utf16.git &&
27 rm test.utf8.raw test.utf16.git
30 test_expect_success 're-encode to UTF-16 on checkout' '
32 git checkout test.utf16 &&
33 test_cmp_bin test.utf16.raw test.utf16 &&
39 test_expect_success 'check prohibited UTF BOM' '
40 printf "\0a\0b\0c" >nobom.utf16be.raw &&
41 printf "a\0b\0c\0" >nobom.utf16le.raw &&
42 printf "\376\777\0a\0b\0c" >bebom.utf16be.raw &&
43 printf "\777\376a\0b\0c\0" >lebom.utf16le.raw &&
45 printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw &&
46 printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw &&
47 printf "\0\0\376\777\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
48 printf "\777\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
50 echo "*.utf16be text working-tree-encoding=utf-16be" >>.gitattributes &&
51 echo "*.utf16le text working-tree-encoding=utf-16le" >>.gitattributes &&
52 echo "*.utf32be text working-tree-encoding=utf-32be" >>.gitattributes &&
53 echo "*.utf32le text working-tree-encoding=utf-32le" >>.gitattributes &&
55 # Here we add a UTF-16 files with BOM (big-endian and little-endian)
56 # but we tell Git to treat it as UTF-16BE/UTF-16LE. In these cases
57 # the BOM is prohibited.
58 cp bebom.utf16be.raw bebom.utf16be &&
59 test_must_fail git add bebom.utf16be 2>err.out &&
60 test_i18ngrep "fatal: BOM is prohibited .* UTF-16BE" err.out &&
62 cp lebom.utf16le.raw lebom.utf16be &&
63 test_must_fail git add lebom.utf16be 2>err.out &&
64 test_i18ngrep "fatal: BOM is prohibited .* UTF-16BE" err.out &&
66 cp bebom.utf16be.raw bebom.utf16le &&
67 test_must_fail git add bebom.utf16le 2>err.out &&
68 test_i18ngrep "fatal: BOM is prohibited .* UTF-16LE" err.out &&
70 cp lebom.utf16le.raw lebom.utf16le &&
71 test_must_fail git add lebom.utf16le 2>err.out &&
72 test_i18ngrep "fatal: BOM is prohibited .* UTF-16LE" err.out &&
74 # ... and the same for UTF-32
75 cp bebom.utf32be.raw bebom.utf32be &&
76 test_must_fail git add bebom.utf32be 2>err.out &&
77 test_i18ngrep "fatal: BOM is prohibited .* UTF-32BE" err.out &&
79 cp lebom.utf32le.raw lebom.utf32be &&
80 test_must_fail git add lebom.utf32be 2>err.out &&
81 test_i18ngrep "fatal: BOM is prohibited .* UTF-32BE" err.out &&
83 cp bebom.utf32be.raw bebom.utf32le &&
84 test_must_fail git add bebom.utf32le 2>err.out &&
85 test_i18ngrep "fatal: BOM is prohibited .* UTF-32LE" err.out &&
87 cp lebom.utf32le.raw lebom.utf32le &&
88 test_must_fail git add lebom.utf32le 2>err.out &&
89 test_i18ngrep "fatal: BOM is prohibited .* UTF-32LE" err.out &&
95 test_expect_success 'check required UTF BOM' '
96 echo "*.utf32 text working-tree-encoding=utf-32" >>.gitattributes &&
98 cp nobom.utf16be.raw nobom.utf16 &&
99 test_must_fail git add nobom.utf16 2>err.out &&
100 test_i18ngrep "fatal: BOM is required .* UTF-16" err.out &&
102 cp nobom.utf16le.raw nobom.utf16 &&
103 test_must_fail git add nobom.utf16 2>err.out &&
104 test_i18ngrep "fatal: BOM is required .* UTF-16" err.out &&
106 cp nobom.utf32be.raw nobom.utf32 &&
107 test_must_fail git add nobom.utf32 2>err.out &&
108 test_i18ngrep "fatal: BOM is required .* UTF-32" err.out &&
110 cp nobom.utf32le.raw nobom.utf32 &&
111 test_must_fail git add nobom.utf32 2>err.out &&
112 test_i18ngrep "fatal: BOM is required .* UTF-32" err.out &&
115 rm nobom.utf16 nobom.utf32 &&
116 git reset --hard HEAD
119 test_expect_success 'eol conversion for UTF-16 encoded files on checkout' '
120 printf "one\ntwo\nthree\n" >lf.utf8.raw &&
121 printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
123 cat lf.utf8.raw | iconv -f UTF-8 -t UTF-16 >lf.utf16.raw &&
124 cat crlf.utf8.raw | iconv -f UTF-8 -t UTF-16 >crlf.utf16.raw &&
125 cp crlf.utf16.raw eol.utf16 &&
127 cat >expectIndexLF <<-\EOF &&
128 i/lf w/-text attr/text eol.utf16
134 # UTF-16 with CRLF (Windows line endings)
136 git -c core.eol=crlf checkout eol.utf16 &&
137 test_cmp_bin crlf.utf16.raw eol.utf16 &&
139 # Although the file has CRLF in the working tree, ensure LF in the index
140 git ls-files --eol eol.utf16 >actual &&
141 test_cmp expectIndexLF actual &&
143 # UTF-16 with LF (Unix line endings)
145 git -c core.eol=lf checkout eol.utf16 &&
146 test_cmp_bin lf.utf16.raw eol.utf16 &&
148 # The file LF in the working tree, ensure LF in the index
149 git ls-files --eol eol.utf16 >actual &&
150 test_cmp expectIndexLF actual&&
152 rm crlf.utf16.raw crlf.utf8.raw lf.utf16.raw lf.utf8.raw &&
155 git reset --hard HEAD^
158 test_expect_success 'check unsupported encodings' '
160 echo "*.nothing text working-tree-encoding=" >>.gitattributes &&
161 printf "nothing" >t.nothing &&
164 echo "*.garbage text working-tree-encoding=garbage" >>.gitattributes &&
165 printf "garbage" >t.garbage &&
166 test_must_fail git add t.garbage 2>err.out &&
167 test_i18ngrep "fatal: failed to encode" err.out &&
171 git reset --hard HEAD
174 test_expect_success 'error if encoding round trip is not the same during refresh' '
175 BEFORE_STATE=$(git rev-parse HEAD) &&
177 # Skip the UTF-16 filter for the added file
178 # This simulates a Git version that has no working tree encoding support
179 echo "hallo" >nonsense.utf16 &&
180 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
181 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
182 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
183 git update-ref refs/heads/master $COMMIT &&
185 test_must_fail git checkout HEAD^ 2>err.out &&
186 test_i18ngrep "error: .* overwritten by checkout:" err.out &&
190 git reset --hard $BEFORE_STATE
193 test_expect_success 'error if encoding garbage is already in Git' '
194 BEFORE_STATE=$(git rev-parse HEAD) &&
196 # Skip the UTF-16 filter for the added file
197 # This simulates a Git version that has no checkoutEncoding support
198 cp nobom.utf16be.raw nonsense.utf16 &&
199 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
200 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
201 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
202 git update-ref refs/heads/master $COMMIT &&
204 git diff 2>err.out &&
205 test_i18ngrep "error: BOM is required" err.out &&
209 git reset --hard $BEFORE_STATE
212 test_expect_success 'check roundtrip encoding' '
213 text="hallo there!\nroundtrip test here!" &&
214 printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
215 printf "$text" | iconv -f UTF-8 -t UTF-16 >roundtrip.utf16 &&
216 echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
218 # SHIFT-JIS encoded files are round-trip checked by default...
219 GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 >/dev/null |
220 grep "Checking roundtrip encoding for SHIFT-JIS" &&
223 # ... unless we overwrite the Git config!
224 test_config core.checkRoundtripEncoding "garbage" &&
225 ! GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 >/dev/null |
226 grep "Checking roundtrip encoding for SHIFT-JIS" &&
227 test_unconfig core.checkRoundtripEncoding &&
230 # UTF-16 encoded files should not be round-trip checked by default...
231 ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 >/dev/null |
232 grep "Checking roundtrip encoding for UTF-16" &&
235 # ... unless we tell Git to check it!
236 test_config_global core.checkRoundtripEncoding "UTF-16, UTF-32" &&
237 GIT_TRACE=1 git add roundtrip.utf16 2>&1 >/dev/null |
238 grep "Checking roundtrip encoding for UTF-16" &&
241 # ... unless we tell Git to check it!
242 # (here we also check that the casing of the encoding is irrelevant)
243 test_config_global core.checkRoundtripEncoding "UTF-32, utf-16" &&
244 GIT_TRACE=1 git add roundtrip.utf16 2>&1 >/dev/null |
245 grep "Checking roundtrip encoding for UTF-16" &&
249 rm roundtrip.shift roundtrip.utf16 &&
250 git reset --hard HEAD