t9351: derive anonymized tree checks from original repo
[git] / mailinfo.c
1 #include "cache.h"
2 #include "config.h"
3 #include "utf8.h"
4 #include "strbuf.h"
5 #include "mailinfo.h"
6
7 static void cleanup_space(struct strbuf *sb)
8 {
9         size_t pos, cnt;
10         for (pos = 0; pos < sb->len; pos++) {
11                 if (isspace(sb->buf[pos])) {
12                         sb->buf[pos] = ' ';
13                         for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
14                         strbuf_remove(sb, pos + 1, cnt);
15                 }
16         }
17 }
18
19 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
20 {
21         struct strbuf *src = name;
22         if (name->len < 3 || 60 < name->len || strpbrk(name->buf, "@<>"))
23                 src = email;
24         else if (name == out)
25                 return;
26         strbuf_reset(out);
27         strbuf_addbuf(out, src);
28 }
29
30 static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
31 {
32         /* John Doe <johndoe> */
33
34         char *bra, *ket;
35         /* This is fallback, so do not bother if we already have an
36          * e-mail address.
37          */
38         if (mi->email.len)
39                 return;
40
41         bra = strchr(line->buf, '<');
42         if (!bra)
43                 return;
44         ket = strchr(bra, '>');
45         if (!ket)
46                 return;
47
48         strbuf_reset(&mi->email);
49         strbuf_add(&mi->email, bra + 1, ket - bra - 1);
50
51         strbuf_reset(&mi->name);
52         strbuf_add(&mi->name, line->buf, bra - line->buf);
53         strbuf_trim(&mi->name);
54         get_sane_name(&mi->name, &mi->name, &mi->email);
55 }
56
57 static const char *unquote_comment(struct strbuf *outbuf, const char *in)
58 {
59         int c;
60         int take_next_literally = 0;
61
62         strbuf_addch(outbuf, '(');
63
64         while ((c = *in++) != 0) {
65                 if (take_next_literally == 1) {
66                         take_next_literally = 0;
67                 } else {
68                         switch (c) {
69                         case '\\':
70                                 take_next_literally = 1;
71                                 continue;
72                         case '(':
73                                 in = unquote_comment(outbuf, in);
74                                 continue;
75                         case ')':
76                                 strbuf_addch(outbuf, ')');
77                                 return in;
78                         }
79                 }
80
81                 strbuf_addch(outbuf, c);
82         }
83
84         return in;
85 }
86
87 static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
88 {
89         int c;
90         int take_next_literally = 0;
91
92         while ((c = *in++) != 0) {
93                 if (take_next_literally == 1) {
94                         take_next_literally = 0;
95                 } else {
96                         switch (c) {
97                         case '\\':
98                                 take_next_literally = 1;
99                                 continue;
100                         case '"':
101                                 return in;
102                         }
103                 }
104
105                 strbuf_addch(outbuf, c);
106         }
107
108         return in;
109 }
110
111 static void unquote_quoted_pair(struct strbuf *line)
112 {
113         struct strbuf outbuf;
114         const char *in = line->buf;
115         int c;
116
117         strbuf_init(&outbuf, line->len);
118
119         while ((c = *in++) != 0) {
120                 switch (c) {
121                 case '"':
122                         in = unquote_quoted_string(&outbuf, in);
123                         continue;
124                 case '(':
125                         in = unquote_comment(&outbuf, in);
126                         continue;
127                 }
128
129                 strbuf_addch(&outbuf, c);
130         }
131
132         strbuf_swap(&outbuf, line);
133         strbuf_release(&outbuf);
134
135 }
136
137 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
138 {
139         char *at;
140         size_t el;
141         struct strbuf f;
142
143         strbuf_init(&f, from->len);
144         strbuf_addbuf(&f, from);
145
146         unquote_quoted_pair(&f);
147
148         at = strchr(f.buf, '@');
149         if (!at) {
150                 parse_bogus_from(mi, from);
151                 goto out;
152         }
153
154         /*
155          * If we already have one email, don't take any confusing lines
156          */
157         if (mi->email.len && strchr(at + 1, '@'))
158                 goto out;
159
160         /* Pick up the string around '@', possibly delimited with <>
161          * pair; that is the email part.
162          */
163         while (at > f.buf) {
164                 char c = at[-1];
165                 if (isspace(c))
166                         break;
167                 if (c == '<') {
168                         at[-1] = ' ';
169                         break;
170                 }
171                 at--;
172         }
173         el = strcspn(at, " \n\t\r\v\f>");
174         strbuf_reset(&mi->email);
175         strbuf_add(&mi->email, at, el);
176         strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
177
178         /* The remainder is name.  It could be
179          *
180          * - "John Doe <john.doe@xz>"                   (a), or
181          * - "john.doe@xz (John Doe)"                   (b), or
182          * - "John (zzz) Doe <john.doe@xz> (Comment)"   (c)
183          *
184          * but we have removed the email part, so
185          *
186          * - remove extra spaces which could stay after email (case 'c'), and
187          * - trim from both ends, possibly removing the () pair at the end
188          *   (cases 'a' and 'b').
189          */
190         cleanup_space(&f);
191         strbuf_trim(&f);
192         if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
193                 strbuf_remove(&f, 0, 1);
194                 strbuf_setlen(&f, f.len - 1);
195         }
196
197         get_sane_name(&mi->name, &f, &mi->email);
198 out:
199         strbuf_release(&f);
200 }
201
202 static void handle_header(struct strbuf **out, const struct strbuf *line)
203 {
204         if (!*out) {
205                 *out = xmalloc(sizeof(struct strbuf));
206                 strbuf_init(*out, line->len);
207         } else
208                 strbuf_reset(*out);
209
210         strbuf_addbuf(*out, line);
211 }
212
213 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
214  * to have enough heuristics to grok MIME encoded patches often found
215  * on our mailing lists.  For example, we do not even treat header lines
216  * case insensitively.
217  */
218
219 static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
220 {
221         const char *ends, *ap = strcasestr(line, name);
222         size_t sz;
223
224         strbuf_setlen(attr, 0);
225         if (!ap)
226                 return 0;
227         ap += strlen(name);
228         if (*ap == '"') {
229                 ap++;
230                 ends = "\"";
231         }
232         else
233                 ends = "; \t";
234         sz = strcspn(ap, ends);
235         strbuf_add(attr, ap, sz);
236         return 1;
237 }
238
239 static int has_attr_value(const char *line, const char *name, const char *value)
240 {
241         struct strbuf sb = STRBUF_INIT;
242         int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
243         strbuf_release(&sb);
244         return rc;
245 }
246
247 static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
248 {
249         struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
250         strbuf_init(boundary, line->len);
251
252         mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
253         mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
254
255         if (slurp_attr(line->buf, "boundary=", boundary)) {
256                 strbuf_insertstr(boundary, 0, "--");
257                 if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
258                         error("Too many boundaries to handle");
259                         mi->input_error = -1;
260                         mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
261                         return;
262                 }
263                 *(mi->content_top) = boundary;
264                 boundary = NULL;
265         }
266         slurp_attr(line->buf, "charset=", &mi->charset);
267
268         if (boundary) {
269                 strbuf_release(boundary);
270                 free(boundary);
271         }
272 }
273
274 static void handle_content_transfer_encoding(struct mailinfo *mi,
275                                              const struct strbuf *line)
276 {
277         if (strcasestr(line->buf, "base64"))
278                 mi->transfer_encoding = TE_BASE64;
279         else if (strcasestr(line->buf, "quoted-printable"))
280                 mi->transfer_encoding = TE_QP;
281         else
282                 mi->transfer_encoding = TE_DONTCARE;
283 }
284
285 static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
286 {
287         struct strbuf *content_top = *(mi->content_top);
288
289         return ((content_top->len <= line->len) &&
290                 !memcmp(line->buf, content_top->buf, content_top->len));
291 }
292
293 static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
294 {
295         size_t at = 0;
296
297         while (at < subject->len) {
298                 char *pos;
299                 size_t remove;
300
301                 switch (subject->buf[at]) {
302                 case 'r': case 'R':
303                         if (subject->len <= at + 3)
304                                 break;
305                         if ((subject->buf[at + 1] == 'e' ||
306                              subject->buf[at + 1] == 'E') &&
307                             subject->buf[at + 2] == ':') {
308                                 strbuf_remove(subject, at, 3);
309                                 continue;
310                         }
311                         at++;
312                         break;
313                 case ' ': case '\t': case ':':
314                         strbuf_remove(subject, at, 1);
315                         continue;
316                 case '[':
317                         pos = strchr(subject->buf + at, ']');
318                         if (!pos)
319                                 break;
320                         remove = pos - subject->buf + at + 1;
321                         if (!mi->keep_non_patch_brackets_in_subject ||
322                             (7 <= remove &&
323                              memmem(subject->buf + at, remove, "PATCH", 5)))
324                                 strbuf_remove(subject, at, remove);
325                         else {
326                                 at += remove;
327                                 /*
328                                  * If the input had a space after the ], keep
329                                  * it.  We don't bother with finding the end of
330                                  * the space, since we later normalize it
331                                  * anyway.
332                                  */
333                                 if (isspace(subject->buf[at]))
334                                         at += 1;
335                         }
336                         continue;
337                 }
338                 break;
339         }
340         strbuf_trim(subject);
341 }
342
343 #define MAX_HDR_PARSED 10
344 static const char *header[MAX_HDR_PARSED] = {
345         "From","Subject","Date",
346 };
347
348 static inline int skip_header(const struct strbuf *line, const char *hdr,
349                               const char **outval)
350 {
351         const char *val;
352         if (!skip_iprefix(line->buf, hdr, &val) ||
353             *val++ != ':')
354                 return 0;
355         while (isspace(*val))
356                 val++;
357         *outval = val;
358         return 1;
359 }
360
361 static int is_format_patch_separator(const char *line, int len)
362 {
363         static const char SAMPLE[] =
364                 "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
365         const char *cp;
366
367         if (len != strlen(SAMPLE))
368                 return 0;
369         if (!skip_prefix(line, "From ", &cp))
370                 return 0;
371         if (strspn(cp, "0123456789abcdef") != 40)
372                 return 0;
373         cp += 40;
374         return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
375 }
376
377 static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
378 {
379         const char *in = q_seg->buf;
380         int c;
381         struct strbuf *out = xmalloc(sizeof(struct strbuf));
382         strbuf_init(out, q_seg->len);
383
384         while ((c = *in++) != 0) {
385                 if (c == '=') {
386                         int ch, d = *in;
387                         if (d == '\n' || !d)
388                                 break; /* drop trailing newline */
389                         ch = hex2chr(in);
390                         if (ch >= 0) {
391                                 strbuf_addch(out, ch);
392                                 in += 2;
393                                 continue;
394                         }
395                         /* garbage -- fall through */
396                 }
397                 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
398                         c = 0x20;
399                 strbuf_addch(out, c);
400         }
401         return out;
402 }
403
404 static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
405 {
406         /* Decode in..ep, possibly in-place to ot */
407         int c, pos = 0, acc = 0;
408         const char *in = b_seg->buf;
409         struct strbuf *out = xmalloc(sizeof(struct strbuf));
410         strbuf_init(out, b_seg->len);
411
412         while ((c = *in++) != 0) {
413                 if (c == '+')
414                         c = 62;
415                 else if (c == '/')
416                         c = 63;
417                 else if ('A' <= c && c <= 'Z')
418                         c -= 'A';
419                 else if ('a' <= c && c <= 'z')
420                         c -= 'a' - 26;
421                 else if ('0' <= c && c <= '9')
422                         c -= '0' - 52;
423                 else
424                         continue; /* garbage */
425                 switch (pos++) {
426                 case 0:
427                         acc = (c << 2);
428                         break;
429                 case 1:
430                         strbuf_addch(out, (acc | (c >> 4)));
431                         acc = (c & 15) << 4;
432                         break;
433                 case 2:
434                         strbuf_addch(out, (acc | (c >> 2)));
435                         acc = (c & 3) << 6;
436                         break;
437                 case 3:
438                         strbuf_addch(out, (acc | c));
439                         acc = pos = 0;
440                         break;
441                 }
442         }
443         return out;
444 }
445
446 static int convert_to_utf8(struct mailinfo *mi,
447                            struct strbuf *line, const char *charset)
448 {
449         char *out;
450         size_t out_len;
451
452         if (!mi->metainfo_charset || !charset || !*charset)
453                 return 0;
454
455         if (same_encoding(mi->metainfo_charset, charset))
456                 return 0;
457         out = reencode_string_len(line->buf, line->len,
458                                   mi->metainfo_charset, charset, &out_len);
459         if (!out) {
460                 mi->input_error = -1;
461                 return error("cannot convert from %s to %s",
462                              charset, mi->metainfo_charset);
463         }
464         strbuf_attach(line, out, out_len, out_len);
465         return 0;
466 }
467
468 static void decode_header(struct mailinfo *mi, struct strbuf *it)
469 {
470         char *in, *ep, *cp;
471         struct strbuf outbuf = STRBUF_INIT, *dec;
472         struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
473         int found_error = 1; /* pessimism */
474
475         in = it->buf;
476         while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
477                 int encoding;
478                 strbuf_reset(&charset_q);
479                 strbuf_reset(&piecebuf);
480
481                 if (in != ep) {
482                         /*
483                          * We are about to process an encoded-word
484                          * that begins at ep, but there is something
485                          * before the encoded word.
486                          */
487                         char *scan;
488                         for (scan = in; scan < ep; scan++)
489                                 if (!isspace(*scan))
490                                         break;
491
492                         if (scan != ep || in == it->buf) {
493                                 /*
494                                  * We should not lose that "something",
495                                  * unless we have just processed an
496                                  * encoded-word, and there is only LWS
497                                  * before the one we are about to process.
498                                  */
499                                 strbuf_add(&outbuf, in, ep - in);
500                         }
501                 }
502                 /* E.g.
503                  * ep : "=?iso-2022-jp?B?GyR...?= foo"
504                  * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
505                  */
506                 ep += 2;
507
508                 if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
509                         goto release_return;
510
511                 if (cp + 3 - it->buf > it->len)
512                         goto release_return;
513                 strbuf_add(&charset_q, ep, cp - ep);
514
515                 encoding = cp[1];
516                 if (!encoding || cp[2] != '?')
517                         goto release_return;
518                 ep = strstr(cp + 3, "?=");
519                 if (!ep)
520                         goto release_return;
521                 strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
522                 switch (tolower(encoding)) {
523                 default:
524                         goto release_return;
525                 case 'b':
526                         dec = decode_b_segment(&piecebuf);
527                         break;
528                 case 'q':
529                         dec = decode_q_segment(&piecebuf, 1);
530                         break;
531                 }
532                 if (convert_to_utf8(mi, dec, charset_q.buf))
533                         goto release_return;
534
535                 strbuf_addbuf(&outbuf, dec);
536                 strbuf_release(dec);
537                 free(dec);
538                 in = ep + 2;
539         }
540         strbuf_addstr(&outbuf, in);
541         strbuf_reset(it);
542         strbuf_addbuf(it, &outbuf);
543         found_error = 0;
544 release_return:
545         strbuf_release(&outbuf);
546         strbuf_release(&charset_q);
547         strbuf_release(&piecebuf);
548
549         if (found_error)
550                 mi->input_error = -1;
551 }
552
553 /*
554  * Returns true if "line" contains a header matching "hdr", in which case "val"
555  * will contain the value of the header with any RFC2047 B and Q encoding
556  * unwrapped, and optionally normalize the meta information to utf8.
557  */
558 static int parse_header(const struct strbuf *line,
559                         const char *hdr,
560                         struct mailinfo *mi,
561                         struct strbuf *val)
562 {
563         const char *val_str;
564
565         if (!skip_header(line, hdr, &val_str))
566                 return 0;
567         strbuf_addstr(val, val_str);
568         decode_header(mi, val);
569         return 1;
570 }
571
572 static int check_header(struct mailinfo *mi,
573                         const struct strbuf *line,
574                         struct strbuf *hdr_data[], int overwrite)
575 {
576         int i, ret = 0;
577         struct strbuf sb = STRBUF_INIT;
578
579         /* search for the interesting parts */
580         for (i = 0; header[i]; i++) {
581                 if ((!hdr_data[i] || overwrite) &&
582                     parse_header(line, header[i], mi, &sb)) {
583                         handle_header(&hdr_data[i], &sb);
584                         ret = 1;
585                         goto check_header_out;
586                 }
587         }
588
589         /* Content stuff */
590         if (parse_header(line, "Content-Type", mi, &sb)) {
591                 handle_content_type(mi, &sb);
592                 ret = 1;
593                 goto check_header_out;
594         }
595         if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
596                 handle_content_transfer_encoding(mi, &sb);
597                 ret = 1;
598                 goto check_header_out;
599         }
600         if (parse_header(line, "Message-Id", mi, &sb)) {
601                 if (mi->add_message_id)
602                         mi->message_id = strbuf_detach(&sb, NULL);
603                 ret = 1;
604                 goto check_header_out;
605         }
606
607 check_header_out:
608         strbuf_release(&sb);
609         return ret;
610 }
611
612 /*
613  * Returns 1 if the given line or any line beginning with the given line is an
614  * in-body header (that is, check_header will succeed when passed
615  * mi->s_hdr_data).
616  */
617 static int is_inbody_header(const struct mailinfo *mi,
618                             const struct strbuf *line)
619 {
620         int i;
621         const char *val;
622         for (i = 0; header[i]; i++)
623                 if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
624                         return 1;
625         return 0;
626 }
627
628 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
629 {
630         struct strbuf *ret;
631
632         switch (mi->transfer_encoding) {
633         case TE_QP:
634                 ret = decode_q_segment(line, 0);
635                 break;
636         case TE_BASE64:
637                 ret = decode_b_segment(line);
638                 break;
639         case TE_DONTCARE:
640         default:
641                 return;
642         }
643         strbuf_reset(line);
644         strbuf_addbuf(line, ret);
645         strbuf_release(ret);
646         free(ret);
647 }
648
649 static inline int patchbreak(const struct strbuf *line)
650 {
651         size_t i;
652
653         /* Beginning of a "diff -" header? */
654         if (starts_with(line->buf, "diff -"))
655                 return 1;
656
657         /* CVS "Index: " line? */
658         if (starts_with(line->buf, "Index: "))
659                 return 1;
660
661         /*
662          * "--- <filename>" starts patches without headers
663          * "---<sp>*" is a manual separator
664          */
665         if (line->len < 4)
666                 return 0;
667
668         if (starts_with(line->buf, "---")) {
669                 /* space followed by a filename? */
670                 if (line->buf[3] == ' ' && !isspace(line->buf[4]))
671                         return 1;
672                 /* Just whitespace? */
673                 for (i = 3; i < line->len; i++) {
674                         unsigned char c = line->buf[i];
675                         if (c == '\n')
676                                 return 1;
677                         if (!isspace(c))
678                                 break;
679                 }
680                 return 0;
681         }
682         return 0;
683 }
684
685 static int is_scissors_line(const char *line)
686 {
687         const char *c;
688         int scissors = 0, gap = 0;
689         const char *first_nonblank = NULL, *last_nonblank = NULL;
690         int visible, perforation = 0, in_perforation = 0;
691
692         for (c = line; *c; c++) {
693                 if (isspace(*c)) {
694                         if (in_perforation) {
695                                 perforation++;
696                                 gap++;
697                         }
698                         continue;
699                 }
700                 last_nonblank = c;
701                 if (first_nonblank == NULL)
702                         first_nonblank = c;
703                 if (*c == '-') {
704                         in_perforation = 1;
705                         perforation++;
706                         continue;
707                 }
708                 if ((!memcmp(c, ">8", 2) || !memcmp(c, "8<", 2) ||
709                      !memcmp(c, ">%", 2) || !memcmp(c, "%<", 2))) {
710                         in_perforation = 1;
711                         perforation += 2;
712                         scissors += 2;
713                         c++;
714                         continue;
715                 }
716                 in_perforation = 0;
717         }
718
719         /*
720          * The mark must be at least 8 bytes long (e.g. "-- >8 --").
721          * Even though there can be arbitrary cruft on the same line
722          * (e.g. "cut here"), in order to avoid misidentification, the
723          * perforation must occupy more than a third of the visible
724          * width of the line, and dashes and scissors must occupy more
725          * than half of the perforation.
726          */
727
728         if (first_nonblank && last_nonblank)
729                 visible = last_nonblank - first_nonblank + 1;
730         else
731                 visible = 0;
732         return (scissors && 8 <= visible &&
733                 visible < perforation * 3 &&
734                 gap * 2 < perforation);
735 }
736
737 static void flush_inbody_header_accum(struct mailinfo *mi)
738 {
739         if (!mi->inbody_header_accum.len)
740                 return;
741         if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
742                 BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
743         strbuf_reset(&mi->inbody_header_accum);
744 }
745
746 static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
747 {
748         if (mi->inbody_header_accum.len &&
749             (line->buf[0] == ' ' || line->buf[0] == '\t')) {
750                 if (mi->use_scissors && is_scissors_line(line->buf)) {
751                         /*
752                          * This is a scissors line; do not consider this line
753                          * as a header continuation line.
754                          */
755                         flush_inbody_header_accum(mi);
756                         return 0;
757                 }
758                 strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
759                 strbuf_addbuf(&mi->inbody_header_accum, line);
760                 return 1;
761         }
762
763         flush_inbody_header_accum(mi);
764
765         if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
766                 return is_format_patch_separator(line->buf + 1, line->len - 1);
767         if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
768                 int i;
769                 for (i = 0; header[i]; i++)
770                         if (!strcmp("Subject", header[i])) {
771                                 handle_header(&mi->s_hdr_data[i], line);
772                                 return 1;
773                         }
774                 return 0;
775         }
776         if (is_inbody_header(mi, line)) {
777                 strbuf_addbuf(&mi->inbody_header_accum, line);
778                 return 1;
779         }
780         return 0;
781 }
782
783 static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
784 {
785         assert(!mi->filter_stage);
786
787         if (mi->header_stage) {
788                 if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
789                         if (mi->inbody_header_accum.len) {
790                                 flush_inbody_header_accum(mi);
791                                 mi->header_stage = 0;
792                         }
793                         return 0;
794                 }
795         }
796
797         if (mi->use_inbody_headers && mi->header_stage) {
798                 mi->header_stage = check_inbody_header(mi, line);
799                 if (mi->header_stage)
800                         return 0;
801         } else
802                 /* Only trim the first (blank) line of the commit message
803                  * when ignoring in-body headers.
804                  */
805                 mi->header_stage = 0;
806
807         /* normalize the log message to UTF-8. */
808         if (convert_to_utf8(mi, line, mi->charset.buf))
809                 return 0; /* mi->input_error already set */
810
811         if (mi->use_scissors && is_scissors_line(line->buf)) {
812                 int i;
813
814                 strbuf_setlen(&mi->log_message, 0);
815                 mi->header_stage = 1;
816
817                 /*
818                  * We may have already read "secondary headers"; purge
819                  * them to give ourselves a clean restart.
820                  */
821                 for (i = 0; header[i]; i++) {
822                         if (mi->s_hdr_data[i])
823                                 strbuf_release(mi->s_hdr_data[i]);
824                         mi->s_hdr_data[i] = NULL;
825                 }
826                 return 0;
827         }
828
829         if (patchbreak(line)) {
830                 if (mi->message_id)
831                         strbuf_addf(&mi->log_message,
832                                     "Message-Id: %s\n", mi->message_id);
833                 return 1;
834         }
835
836         strbuf_addbuf(&mi->log_message, line);
837         return 0;
838 }
839
840 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
841 {
842         fwrite(line->buf, 1, line->len, mi->patchfile);
843         mi->patch_lines++;
844 }
845
846 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
847 {
848         switch (mi->filter_stage) {
849         case 0:
850                 if (!handle_commit_msg(mi, line))
851                         break;
852                 mi->filter_stage++;
853                 /* fallthrough */
854         case 1:
855                 handle_patch(mi, line);
856                 break;
857         }
858 }
859
860 static int is_rfc2822_header(const struct strbuf *line)
861 {
862         /*
863          * The section that defines the loosest possible
864          * field name is "3.6.8 Optional fields".
865          *
866          * optional-field = field-name ":" unstructured CRLF
867          * field-name = 1*ftext
868          * ftext = %d33-57 / %59-126
869          */
870         int ch;
871         char *cp = line->buf;
872
873         /* Count mbox From headers as headers */
874         if (starts_with(cp, "From ") || starts_with(cp, ">From "))
875                 return 1;
876
877         while ((ch = *cp++)) {
878                 if (ch == ':')
879                         return 1;
880                 if ((33 <= ch && ch <= 57) ||
881                     (59 <= ch && ch <= 126))
882                         continue;
883                 break;
884         }
885         return 0;
886 }
887
888 static int read_one_header_line(struct strbuf *line, FILE *in)
889 {
890         struct strbuf continuation = STRBUF_INIT;
891
892         /* Get the first part of the line. */
893         if (strbuf_getline_lf(line, in))
894                 return 0;
895
896         /*
897          * Is it an empty line or not a valid rfc2822 header?
898          * If so, stop here, and return false ("not a header")
899          */
900         strbuf_rtrim(line);
901         if (!line->len || !is_rfc2822_header(line)) {
902                 /* Re-add the newline */
903                 strbuf_addch(line, '\n');
904                 return 0;
905         }
906
907         /*
908          * Now we need to eat all the continuation lines..
909          * Yuck, 2822 header "folding"
910          */
911         for (;;) {
912                 int peek;
913
914                 peek = fgetc(in);
915                 if (peek == EOF)
916                         break;
917                 ungetc(peek, in);
918                 if (peek != ' ' && peek != '\t')
919                         break;
920                 if (strbuf_getline_lf(&continuation, in))
921                         break;
922                 continuation.buf[0] = ' ';
923                 strbuf_rtrim(&continuation);
924                 strbuf_addbuf(line, &continuation);
925         }
926         strbuf_release(&continuation);
927
928         return 1;
929 }
930
931 static int find_boundary(struct mailinfo *mi, struct strbuf *line)
932 {
933         while (!strbuf_getline_lf(line, mi->input)) {
934                 if (*(mi->content_top) && is_multipart_boundary(mi, line))
935                         return 1;
936         }
937         return 0;
938 }
939
940 static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
941 {
942         struct strbuf newline = STRBUF_INIT;
943
944         strbuf_addch(&newline, '\n');
945 again:
946         if (line->len >= (*(mi->content_top))->len + 2 &&
947             !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
948                 /* we hit an end boundary */
949                 /* pop the current boundary off the stack */
950                 strbuf_release(*(mi->content_top));
951                 FREE_AND_NULL(*(mi->content_top));
952
953                 /* technically won't happen as is_multipart_boundary()
954                    will fail first.  But just in case..
955                  */
956                 if (--mi->content_top < mi->content) {
957                         error("Detected mismatched boundaries, can't recover");
958                         mi->input_error = -1;
959                         mi->content_top = mi->content;
960                         strbuf_release(&newline);
961                         return 0;
962                 }
963                 handle_filter(mi, &newline);
964                 strbuf_release(&newline);
965                 if (mi->input_error)
966                         return 0;
967
968                 /* skip to the next boundary */
969                 if (!find_boundary(mi, line))
970                         return 0;
971                 goto again;
972         }
973
974         /* set some defaults */
975         mi->transfer_encoding = TE_DONTCARE;
976         strbuf_reset(&mi->charset);
977
978         /* slurp in this section's info */
979         while (read_one_header_line(line, mi->input))
980                 check_header(mi, line, mi->p_hdr_data, 0);
981
982         strbuf_release(&newline);
983         /* replenish line */
984         if (strbuf_getline_lf(line, mi->input))
985                 return 0;
986         strbuf_addch(line, '\n');
987         return 1;
988 }
989
990 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
991                                  struct strbuf *prev)
992 {
993         size_t len = line->len;
994         const char *rest;
995
996         if (!mi->format_flowed) {
997                 handle_filter(mi, line);
998                 return;
999         }
1000
1001         if (line->buf[len - 1] == '\n') {
1002                 len--;
1003                 if (len && line->buf[len - 1] == '\r')
1004                         len--;
1005         }
1006
1007         /* Keep signature separator as-is. */
1008         if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1009                 if (prev->len) {
1010                         handle_filter(mi, prev);
1011                         strbuf_reset(prev);
1012                 }
1013                 handle_filter(mi, line);
1014                 return;
1015         }
1016
1017         /* Unstuff space-stuffed line. */
1018         if (len && line->buf[0] == ' ') {
1019                 strbuf_remove(line, 0, 1);
1020                 len--;
1021         }
1022
1023         /* Save flowed line for later, but without the soft line break. */
1024         if (len && line->buf[len - 1] == ' ') {
1025                 strbuf_add(prev, line->buf, len - !!mi->delsp);
1026                 return;
1027         }
1028
1029         /* Prepend any previous partial lines */
1030         strbuf_insert(line, 0, prev->buf, prev->len);
1031         strbuf_reset(prev);
1032
1033         handle_filter(mi, line);
1034 }
1035
1036 static void handle_body(struct mailinfo *mi, struct strbuf *line)
1037 {
1038         struct strbuf prev = STRBUF_INIT;
1039
1040         /* Skip up to the first boundary */
1041         if (*(mi->content_top)) {
1042                 if (!find_boundary(mi, line))
1043                         goto handle_body_out;
1044         }
1045
1046         do {
1047                 /* process any boundary lines */
1048                 if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1049                         /* flush any leftover */
1050                         if (prev.len) {
1051                                 handle_filter(mi, &prev);
1052                                 strbuf_reset(&prev);
1053                         }
1054                         if (!handle_boundary(mi, line))
1055                                 goto handle_body_out;
1056                 }
1057
1058                 /* Unwrap transfer encoding */
1059                 decode_transfer_encoding(mi, line);
1060
1061                 switch (mi->transfer_encoding) {
1062                 case TE_BASE64:
1063                 case TE_QP:
1064                 {
1065                         struct strbuf **lines, **it, *sb;
1066
1067                         /* Prepend any previous partial lines */
1068                         strbuf_insert(line, 0, prev.buf, prev.len);
1069                         strbuf_reset(&prev);
1070
1071                         /*
1072                          * This is a decoded line that may contain
1073                          * multiple new lines.  Pass only one chunk
1074                          * at a time to handle_filter()
1075                          */
1076                         lines = strbuf_split(line, '\n');
1077                         for (it = lines; (sb = *it); it++) {
1078                                 if (*(it + 1) == NULL) /* The last line */
1079                                         if (sb->buf[sb->len - 1] != '\n') {
1080                                                 /* Partial line, save it for later. */
1081                                                 strbuf_addbuf(&prev, sb);
1082                                                 break;
1083                                         }
1084                                 handle_filter_flowed(mi, sb, &prev);
1085                         }
1086                         /*
1087                          * The partial chunk is saved in "prev" and will be
1088                          * appended by the next iteration of read_line_with_nul().
1089                          */
1090                         strbuf_list_free(lines);
1091                         break;
1092                 }
1093                 default:
1094                         handle_filter_flowed(mi, line, &prev);
1095                 }
1096
1097                 if (mi->input_error)
1098                         break;
1099         } while (!strbuf_getwholeline(line, mi->input, '\n'));
1100
1101         if (prev.len)
1102                 handle_filter(mi, &prev);
1103
1104         flush_inbody_header_accum(mi);
1105
1106 handle_body_out:
1107         strbuf_release(&prev);
1108 }
1109
1110 static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1111 {
1112         const char *sp = data->buf;
1113         while (1) {
1114                 char *ep = strchr(sp, '\n');
1115                 int len;
1116                 if (!ep)
1117                         len = strlen(sp);
1118                 else
1119                         len = ep - sp;
1120                 fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1121                 if (!ep)
1122                         break;
1123                 sp = ep + 1;
1124         }
1125 }
1126
1127 static void handle_info(struct mailinfo *mi)
1128 {
1129         struct strbuf *hdr;
1130         int i;
1131
1132         for (i = 0; header[i]; i++) {
1133                 /* only print inbody headers if we output a patch file */
1134                 if (mi->patch_lines && mi->s_hdr_data[i])
1135                         hdr = mi->s_hdr_data[i];
1136                 else if (mi->p_hdr_data[i])
1137                         hdr = mi->p_hdr_data[i];
1138                 else
1139                         continue;
1140
1141                 if (memchr(hdr->buf, '\0', hdr->len)) {
1142                         error("a NUL byte in '%s' is not allowed.", header[i]);
1143                         mi->input_error = -1;
1144                 }
1145
1146                 if (!strcmp(header[i], "Subject")) {
1147                         if (!mi->keep_subject) {
1148                                 cleanup_subject(mi, hdr);
1149                                 cleanup_space(hdr);
1150                         }
1151                         output_header_lines(mi->output, "Subject", hdr);
1152                 } else if (!strcmp(header[i], "From")) {
1153                         cleanup_space(hdr);
1154                         handle_from(mi, hdr);
1155                         fprintf(mi->output, "Author: %s\n", mi->name.buf);
1156                         fprintf(mi->output, "Email: %s\n", mi->email.buf);
1157                 } else {
1158                         cleanup_space(hdr);
1159                         fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1160                 }
1161         }
1162         fprintf(mi->output, "\n");
1163 }
1164
1165 int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1166 {
1167         FILE *cmitmsg;
1168         int peek;
1169         struct strbuf line = STRBUF_INIT;
1170
1171         cmitmsg = fopen(msg, "w");
1172         if (!cmitmsg) {
1173                 perror(msg);
1174                 return -1;
1175         }
1176         mi->patchfile = fopen(patch, "w");
1177         if (!mi->patchfile) {
1178                 perror(patch);
1179                 fclose(cmitmsg);
1180                 return -1;
1181         }
1182
1183         mi->p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->p_hdr_data)));
1184         mi->s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*(mi->s_hdr_data)));
1185
1186         do {
1187                 peek = fgetc(mi->input);
1188                 if (peek == EOF) {
1189                         fclose(cmitmsg);
1190                         return error("empty patch: '%s'", patch);
1191                 }
1192         } while (isspace(peek));
1193         ungetc(peek, mi->input);
1194
1195         /* process the email header */
1196         while (read_one_header_line(&line, mi->input))
1197                 check_header(mi, &line, mi->p_hdr_data, 1);
1198
1199         handle_body(mi, &line);
1200         fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1201         fclose(cmitmsg);
1202         fclose(mi->patchfile);
1203
1204         handle_info(mi);
1205         strbuf_release(&line);
1206         return mi->input_error;
1207 }
1208
1209 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
1210 {
1211         struct mailinfo *mi = mi_;
1212
1213         if (!starts_with(var, "mailinfo."))
1214                 return git_default_config(var, value, NULL);
1215         if (!strcmp(var, "mailinfo.scissors")) {
1216                 mi->use_scissors = git_config_bool(var, value);
1217                 return 0;
1218         }
1219         /* perhaps others here */
1220         return 0;
1221 }
1222
1223 void setup_mailinfo(struct mailinfo *mi)
1224 {
1225         memset(mi, 0, sizeof(*mi));
1226         strbuf_init(&mi->name, 0);
1227         strbuf_init(&mi->email, 0);
1228         strbuf_init(&mi->charset, 0);
1229         strbuf_init(&mi->log_message, 0);
1230         strbuf_init(&mi->inbody_header_accum, 0);
1231         mi->header_stage = 1;
1232         mi->use_inbody_headers = 1;
1233         mi->content_top = mi->content;
1234         git_config(git_mailinfo_config, mi);
1235 }
1236
1237 void clear_mailinfo(struct mailinfo *mi)
1238 {
1239         int i;
1240
1241         strbuf_release(&mi->name);
1242         strbuf_release(&mi->email);
1243         strbuf_release(&mi->charset);
1244         strbuf_release(&mi->inbody_header_accum);
1245         free(mi->message_id);
1246
1247         if (mi->p_hdr_data)
1248                 for (i = 0; mi->p_hdr_data[i]; i++)
1249                         strbuf_release(mi->p_hdr_data[i]);
1250         free(mi->p_hdr_data);
1251         if (mi->s_hdr_data)
1252                 for (i = 0; mi->s_hdr_data[i]; i++)
1253                         strbuf_release(mi->s_hdr_data[i]);
1254         free(mi->s_hdr_data);
1255
1256         while (mi->content < mi->content_top) {
1257                 free(*(mi->content_top));
1258                 mi->content_top--;
1259         }
1260
1261         strbuf_release(&mi->log_message);
1262 }