3 static const char *utf8_replace_character = "�";
6 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
9 int cmd__xml_encode(int argc, const char **argv)
11 unsigned char buf[1024], tmp[4], *tmp2 = NULL;
12 ssize_t cur = 0, len = 1, remaining = 0;
17 len = xread(0, buf, sizeof(buf));
21 die_errno("Could not read <stdin>");
27 if ((ch & 0xc0) != 0x80) {
28 fputs(utf8_replace_character, stdout);
35 if (--remaining == 0) {
36 fwrite(tmp, tmp2 - tmp, 1, stdout);
45 fputs("&", stdout);
47 fputs("'", stdout);
49 fputs(""", stdout);
51 fputs("<", stdout);
53 fputs(">", stdout);
56 else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
57 fprintf(stdout, "&#x%02x;", ch);
59 fputs(utf8_replace_character, stdout);
60 } else if ((ch & 0xe0) == 0xc0) {
61 /* 110XXXXx 10xxxxxx */
65 } else if ((ch & 0xf0) == 0xe0) {
66 /* 1110XXXX 10Xxxxxx 10xxxxxx */
70 } else if ((ch & 0xf8) == 0xf0) {
71 /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
76 fputs(utf8_replace_character, stdout);