Merge branch 'ah/pull'
[git] / t / helper / test-xml-encode.c
1 #include "test-tool.h"
2
3 static const char *utf8_replace_character = "�";
4
5 /*
6  * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
7  * in an XML file.
8  */
9 int cmd__xml_encode(int argc, const char **argv)
10 {
11         unsigned char buf[1024], tmp[4], *tmp2 = NULL;
12         ssize_t cur = 0, len = 1, remaining = 0;
13         unsigned char ch;
14
15         for (;;) {
16                 if (++cur == len) {
17                         len = xread(0, buf, sizeof(buf));
18                         if (!len)
19                                 return 0;
20                         if (len < 0)
21                                 die_errno("Could not read <stdin>");
22                         cur = 0;
23                 }
24                 ch = buf[cur];
25
26                 if (tmp2) {
27                         if ((ch & 0xc0) != 0x80) {
28                                 fputs(utf8_replace_character, stdout);
29                                 tmp2 = NULL;
30                                 cur--;
31                                 continue;
32                         }
33                         *tmp2 = ch;
34                         tmp2++;
35                         if (--remaining == 0) {
36                                 fwrite(tmp, tmp2 - tmp, 1, stdout);
37                                 tmp2 = NULL;
38                         }
39                         continue;
40                 }
41
42                 if (!(ch & 0x80)) {
43                         /* 0xxxxxxx */
44                         if (ch == '&')
45                                 fputs("&amp;", stdout);
46                         else if (ch == '\'')
47                                 fputs("&apos;", stdout);
48                         else if (ch == '"')
49                                 fputs("&quot;", stdout);
50                         else if (ch == '<')
51                                 fputs("&lt;", stdout);
52                         else if (ch == '>')
53                                 fputs("&gt;", stdout);
54                         else if (ch >= 0x20)
55                                 fputc(ch, stdout);
56                         else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
57                                 fprintf(stdout, "&#x%02x;", ch);
58                         else
59                                 fputs(utf8_replace_character, stdout);
60                 } else if ((ch & 0xe0) == 0xc0) {
61                         /* 110XXXXx 10xxxxxx */
62                         tmp[0] = ch;
63                         remaining = 1;
64                         tmp2 = tmp + 1;
65                 } else if ((ch & 0xf0) == 0xe0) {
66                         /* 1110XXXX 10Xxxxxx 10xxxxxx */
67                         tmp[0] = ch;
68                         remaining = 2;
69                         tmp2 = tmp + 1;
70                 } else if ((ch & 0xf8) == 0xf0) {
71                         /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
72                         tmp[0] = ch;
73                         remaining = 3;
74                         tmp2 = tmp + 1;
75                 } else
76                         fputs(utf8_replace_character, stdout);
77         }
78
79         return 0;
80 }