mailinfo: move use_scissors and use_inbody_headers to struct mailinfo
[git] / builtin / mailinfo.c
1 /*
2  * Another stupid program, this one parsing the headers of an
3  * email to figure out authorship and subject
4  */
5 #include "cache.h"
6 #include "builtin.h"
7 #include "utf8.h"
8 #include "strbuf.h"
9
10 static FILE *cmitmsg, *patchfile;
11
12 static const char *metainfo_charset;
13
14 struct mailinfo {
15         FILE *input;
16         FILE *output;
17
18         struct strbuf name;
19         struct strbuf email;
20         int keep_subject;
21         int keep_non_patch_brackets_in_subject;
22         int add_message_id;
23         int use_scissors;
24         int use_inbody_headers;
25
26         char *message_id;
27         int patch_lines;
28         int filter_stage; /* still reading log or are we copying patch? */
29         int header_stage; /* still checking in-body headers? */
30 };
31
32 static enum  {
33         TE_DONTCARE, TE_QP, TE_BASE64
34 } transfer_encoding;
35
36 static struct strbuf charset = STRBUF_INIT;
37 static struct strbuf **p_hdr_data, **s_hdr_data;
38
39 #define MAX_BOUNDARIES 5
40
41 static void cleanup_space(struct strbuf *sb)
42 {
43         size_t pos, cnt;
44         for (pos = 0; pos < sb->len; pos++) {
45                 if (isspace(sb->buf[pos])) {
46                         sb->buf[pos] = ' ';
47                         for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
48                         strbuf_remove(sb, pos + 1, cnt);
49                 }
50         }
51 }
52
53 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
54 {
55         struct strbuf *src = name;
56         if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') ||
57                 strchr(name->buf, '<') || strchr(name->buf, '>'))
58                 src = email;
59         else if (name == out)
60                 return;
61         strbuf_reset(out);
62         strbuf_addbuf(out, src);
63 }
64
65 static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
66 {
67         /* John Doe <johndoe> */
68
69         char *bra, *ket;
70         /* This is fallback, so do not bother if we already have an
71          * e-mail address.
72          */
73         if (mi->email.len)
74                 return;
75
76         bra = strchr(line->buf, '<');
77         if (!bra)
78                 return;
79         ket = strchr(bra, '>');
80         if (!ket)
81                 return;
82
83         strbuf_reset(&mi->email);
84         strbuf_add(&mi->email, bra + 1, ket - bra - 1);
85
86         strbuf_reset(&mi->name);
87         strbuf_add(&mi->name, line->buf, bra - line->buf);
88         strbuf_trim(&mi->name);
89         get_sane_name(&mi->name, &mi->name, &mi->email);
90 }
91
92 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
93 {
94         char *at;
95         size_t el;
96         struct strbuf f;
97
98         strbuf_init(&f, from->len);
99         strbuf_addbuf(&f, from);
100
101         at = strchr(f.buf, '@');
102         if (!at) {
103                 parse_bogus_from(mi, from);
104                 return;
105         }
106
107         /*
108          * If we already have one email, don't take any confusing lines
109          */
110         if (mi->email.len && strchr(at + 1, '@')) {
111                 strbuf_release(&f);
112                 return;
113         }
114
115         /* Pick up the string around '@', possibly delimited with <>
116          * pair; that is the email part.
117          */
118         while (at > f.buf) {
119                 char c = at[-1];
120                 if (isspace(c))
121                         break;
122                 if (c == '<') {
123                         at[-1] = ' ';
124                         break;
125                 }
126                 at--;
127         }
128         el = strcspn(at, " \n\t\r\v\f>");
129         strbuf_reset(&mi->email);
130         strbuf_add(&mi->email, at, el);
131         strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
132
133         /* The remainder is name.  It could be
134          *
135          * - "John Doe <john.doe@xz>"                   (a), or
136          * - "john.doe@xz (John Doe)"                   (b), or
137          * - "John (zzz) Doe <john.doe@xz> (Comment)"   (c)
138          *
139          * but we have removed the email part, so
140          *
141          * - remove extra spaces which could stay after email (case 'c'), and
142          * - trim from both ends, possibly removing the () pair at the end
143          *   (cases 'a' and 'b').
144          */
145         cleanup_space(&f);
146         strbuf_trim(&f);
147         if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
148                 strbuf_remove(&f, 0, 1);
149                 strbuf_setlen(&f, f.len - 1);
150         }
151
152         get_sane_name(&mi->name, &f, &mi->email);
153         strbuf_release(&f);
154 }
155
156 static void handle_header(struct strbuf **out, const struct strbuf *line)
157 {
158         if (!*out) {
159                 *out = xmalloc(sizeof(struct strbuf));
160                 strbuf_init(*out, line->len);
161         } else
162                 strbuf_reset(*out);
163
164         strbuf_addbuf(*out, line);
165 }
166
167 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
168  * to have enough heuristics to grok MIME encoded patches often found
169  * on our mailing lists.  For example, we do not even treat header lines
170  * case insensitively.
171  */
172
173 static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
174 {
175         const char *ends, *ap = strcasestr(line, name);
176         size_t sz;
177
178         strbuf_setlen(attr, 0);
179         if (!ap)
180                 return 0;
181         ap += strlen(name);
182         if (*ap == '"') {
183                 ap++;
184                 ends = "\"";
185         }
186         else
187                 ends = "; \t";
188         sz = strcspn(ap, ends);
189         strbuf_add(attr, ap, sz);
190         return 1;
191 }
192
193 static struct strbuf *content[MAX_BOUNDARIES];
194
195 static struct strbuf **content_top = content;
196
197 static void handle_content_type(struct strbuf *line)
198 {
199         struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
200         strbuf_init(boundary, line->len);
201
202         if (slurp_attr(line->buf, "boundary=", boundary)) {
203                 strbuf_insert(boundary, 0, "--", 2);
204                 if (++content_top >= &content[MAX_BOUNDARIES]) {
205                         fprintf(stderr, "Too many boundaries to handle\n");
206                         exit(1);
207                 }
208                 *content_top = boundary;
209                 boundary = NULL;
210         }
211         slurp_attr(line->buf, "charset=", &charset);
212
213         if (boundary) {
214                 strbuf_release(boundary);
215                 free(boundary);
216         }
217 }
218
219 static void handle_message_id(struct mailinfo *mi, const struct strbuf *line)
220 {
221         if (mi->add_message_id)
222                 mi->message_id = strdup(line->buf);
223 }
224
225 static void handle_content_transfer_encoding(const struct strbuf *line)
226 {
227         if (strcasestr(line->buf, "base64"))
228                 transfer_encoding = TE_BASE64;
229         else if (strcasestr(line->buf, "quoted-printable"))
230                 transfer_encoding = TE_QP;
231         else
232                 transfer_encoding = TE_DONTCARE;
233 }
234
235 static int is_multipart_boundary(const struct strbuf *line)
236 {
237         return (((*content_top)->len <= line->len) &&
238                 !memcmp(line->buf, (*content_top)->buf, (*content_top)->len));
239 }
240
241 static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
242 {
243         size_t at = 0;
244
245         while (at < subject->len) {
246                 char *pos;
247                 size_t remove;
248
249                 switch (subject->buf[at]) {
250                 case 'r': case 'R':
251                         if (subject->len <= at + 3)
252                                 break;
253                         if ((subject->buf[at + 1] == 'e' ||
254                              subject->buf[at + 1] == 'E') &&
255                             subject->buf[at + 2] == ':') {
256                                 strbuf_remove(subject, at, 3);
257                                 continue;
258                         }
259                         at++;
260                         break;
261                 case ' ': case '\t': case ':':
262                         strbuf_remove(subject, at, 1);
263                         continue;
264                 case '[':
265                         pos = strchr(subject->buf + at, ']');
266                         if (!pos)
267                                 break;
268                         remove = pos - subject->buf + at + 1;
269                         if (!mi->keep_non_patch_brackets_in_subject ||
270                             (7 <= remove &&
271                              memmem(subject->buf + at, remove, "PATCH", 5)))
272                                 strbuf_remove(subject, at, remove);
273                         else {
274                                 at += remove;
275                                 /*
276                                  * If the input had a space after the ], keep
277                                  * it.  We don't bother with finding the end of
278                                  * the space, since we later normalize it
279                                  * anyway.
280                                  */
281                                 if (isspace(subject->buf[at]))
282                                         at += 1;
283                         }
284                         continue;
285                 }
286                 break;
287         }
288         strbuf_trim(subject);
289 }
290
291 #define MAX_HDR_PARSED 10
292 static const char *header[MAX_HDR_PARSED] = {
293         "From","Subject","Date",
294 };
295
296 static inline int cmp_header(const struct strbuf *line, const char *hdr)
297 {
298         int len = strlen(hdr);
299         return !strncasecmp(line->buf, hdr, len) && line->len > len &&
300                         line->buf[len] == ':' && isspace(line->buf[len + 1]);
301 }
302
303 static int is_format_patch_separator(const char *line, int len)
304 {
305         static const char SAMPLE[] =
306                 "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
307         const char *cp;
308
309         if (len != strlen(SAMPLE))
310                 return 0;
311         if (!skip_prefix(line, "From ", &cp))
312                 return 0;
313         if (strspn(cp, "0123456789abcdef") != 40)
314                 return 0;
315         cp += 40;
316         return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
317 }
318
319 static struct strbuf *decode_q_segment(const struct strbuf *q_seg, int rfc2047)
320 {
321         const char *in = q_seg->buf;
322         int c;
323         struct strbuf *out = xmalloc(sizeof(struct strbuf));
324         strbuf_init(out, q_seg->len);
325
326         while ((c = *in++) != 0) {
327                 if (c == '=') {
328                         int d = *in++;
329                         if (d == '\n' || !d)
330                                 break; /* drop trailing newline */
331                         strbuf_addch(out, (hexval(d) << 4) | hexval(*in++));
332                         continue;
333                 }
334                 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
335                         c = 0x20;
336                 strbuf_addch(out, c);
337         }
338         return out;
339 }
340
341 static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
342 {
343         /* Decode in..ep, possibly in-place to ot */
344         int c, pos = 0, acc = 0;
345         const char *in = b_seg->buf;
346         struct strbuf *out = xmalloc(sizeof(struct strbuf));
347         strbuf_init(out, b_seg->len);
348
349         while ((c = *in++) != 0) {
350                 if (c == '+')
351                         c = 62;
352                 else if (c == '/')
353                         c = 63;
354                 else if ('A' <= c && c <= 'Z')
355                         c -= 'A';
356                 else if ('a' <= c && c <= 'z')
357                         c -= 'a' - 26;
358                 else if ('0' <= c && c <= '9')
359                         c -= '0' - 52;
360                 else
361                         continue; /* garbage */
362                 switch (pos++) {
363                 case 0:
364                         acc = (c << 2);
365                         break;
366                 case 1:
367                         strbuf_addch(out, (acc | (c >> 4)));
368                         acc = (c & 15) << 4;
369                         break;
370                 case 2:
371                         strbuf_addch(out, (acc | (c >> 2)));
372                         acc = (c & 3) << 6;
373                         break;
374                 case 3:
375                         strbuf_addch(out, (acc | c));
376                         acc = pos = 0;
377                         break;
378                 }
379         }
380         return out;
381 }
382
383 static void convert_to_utf8(struct strbuf *line, const char *charset)
384 {
385         char *out;
386
387         if (!charset || !*charset)
388                 return;
389
390         if (same_encoding(metainfo_charset, charset))
391                 return;
392         out = reencode_string(line->buf, metainfo_charset, charset);
393         if (!out)
394                 die("cannot convert from %s to %s",
395                     charset, metainfo_charset);
396         strbuf_attach(line, out, strlen(out), strlen(out));
397 }
398
399 static void decode_header(struct strbuf *it)
400 {
401         char *in, *ep, *cp;
402         struct strbuf outbuf = STRBUF_INIT, *dec;
403         struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
404
405         in = it->buf;
406         while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
407                 int encoding;
408                 strbuf_reset(&charset_q);
409                 strbuf_reset(&piecebuf);
410
411                 if (in != ep) {
412                         /*
413                          * We are about to process an encoded-word
414                          * that begins at ep, but there is something
415                          * before the encoded word.
416                          */
417                         char *scan;
418                         for (scan = in; scan < ep; scan++)
419                                 if (!isspace(*scan))
420                                         break;
421
422                         if (scan != ep || in == it->buf) {
423                                 /*
424                                  * We should not lose that "something",
425                                  * unless we have just processed an
426                                  * encoded-word, and there is only LWS
427                                  * before the one we are about to process.
428                                  */
429                                 strbuf_add(&outbuf, in, ep - in);
430                         }
431                 }
432                 /* E.g.
433                  * ep : "=?iso-2022-jp?B?GyR...?= foo"
434                  * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
435                  */
436                 ep += 2;
437
438                 if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
439                         goto release_return;
440
441                 if (cp + 3 - it->buf > it->len)
442                         goto release_return;
443                 strbuf_add(&charset_q, ep, cp - ep);
444
445                 encoding = cp[1];
446                 if (!encoding || cp[2] != '?')
447                         goto release_return;
448                 ep = strstr(cp + 3, "?=");
449                 if (!ep)
450                         goto release_return;
451                 strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
452                 switch (tolower(encoding)) {
453                 default:
454                         goto release_return;
455                 case 'b':
456                         dec = decode_b_segment(&piecebuf);
457                         break;
458                 case 'q':
459                         dec = decode_q_segment(&piecebuf, 1);
460                         break;
461                 }
462                 if (metainfo_charset)
463                         convert_to_utf8(dec, charset_q.buf);
464
465                 strbuf_addbuf(&outbuf, dec);
466                 strbuf_release(dec);
467                 free(dec);
468                 in = ep + 2;
469         }
470         strbuf_addstr(&outbuf, in);
471         strbuf_reset(it);
472         strbuf_addbuf(it, &outbuf);
473 release_return:
474         strbuf_release(&outbuf);
475         strbuf_release(&charset_q);
476         strbuf_release(&piecebuf);
477 }
478
479 static int check_header(struct mailinfo *mi,
480                         const struct strbuf *line,
481                         struct strbuf *hdr_data[], int overwrite)
482 {
483         int i, ret = 0, len;
484         struct strbuf sb = STRBUF_INIT;
485
486         /* search for the interesting parts */
487         for (i = 0; header[i]; i++) {
488                 int len = strlen(header[i]);
489                 if ((!hdr_data[i] || overwrite) && cmp_header(line, header[i])) {
490                         /* Unwrap inline B and Q encoding, and optionally
491                          * normalize the meta information to utf8.
492                          */
493                         strbuf_add(&sb, line->buf + len + 2, line->len - len - 2);
494                         decode_header(&sb);
495                         handle_header(&hdr_data[i], &sb);
496                         ret = 1;
497                         goto check_header_out;
498                 }
499         }
500
501         /* Content stuff */
502         if (cmp_header(line, "Content-Type")) {
503                 len = strlen("Content-Type: ");
504                 strbuf_add(&sb, line->buf + len, line->len - len);
505                 decode_header(&sb);
506                 strbuf_insert(&sb, 0, "Content-Type: ", len);
507                 handle_content_type(&sb);
508                 ret = 1;
509                 goto check_header_out;
510         }
511         if (cmp_header(line, "Content-Transfer-Encoding")) {
512                 len = strlen("Content-Transfer-Encoding: ");
513                 strbuf_add(&sb, line->buf + len, line->len - len);
514                 decode_header(&sb);
515                 handle_content_transfer_encoding(&sb);
516                 ret = 1;
517                 goto check_header_out;
518         }
519         if (cmp_header(line, "Message-Id")) {
520                 len = strlen("Message-Id: ");
521                 strbuf_add(&sb, line->buf + len, line->len - len);
522                 decode_header(&sb);
523                 handle_message_id(mi, &sb);
524                 ret = 1;
525                 goto check_header_out;
526         }
527
528         /* for inbody stuff */
529         if (starts_with(line->buf, ">From") && isspace(line->buf[5])) {
530                 ret = is_format_patch_separator(line->buf + 1, line->len - 1);
531                 goto check_header_out;
532         }
533         if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
534                 for (i = 0; header[i]; i++) {
535                         if (!strcmp("Subject", header[i])) {
536                                 handle_header(&hdr_data[i], line);
537                                 ret = 1;
538                                 goto check_header_out;
539                         }
540                 }
541         }
542
543 check_header_out:
544         strbuf_release(&sb);
545         return ret;
546 }
547
548 static void decode_transfer_encoding(struct strbuf *line)
549 {
550         struct strbuf *ret;
551
552         switch (transfer_encoding) {
553         case TE_QP:
554                 ret = decode_q_segment(line, 0);
555                 break;
556         case TE_BASE64:
557                 ret = decode_b_segment(line);
558                 break;
559         case TE_DONTCARE:
560         default:
561                 return;
562         }
563         strbuf_reset(line);
564         strbuf_addbuf(line, ret);
565         strbuf_release(ret);
566         free(ret);
567 }
568
569 static inline int patchbreak(const struct strbuf *line)
570 {
571         size_t i;
572
573         /* Beginning of a "diff -" header? */
574         if (starts_with(line->buf, "diff -"))
575                 return 1;
576
577         /* CVS "Index: " line? */
578         if (starts_with(line->buf, "Index: "))
579                 return 1;
580
581         /*
582          * "--- <filename>" starts patches without headers
583          * "---<sp>*" is a manual separator
584          */
585         if (line->len < 4)
586                 return 0;
587
588         if (starts_with(line->buf, "---")) {
589                 /* space followed by a filename? */
590                 if (line->buf[3] == ' ' && !isspace(line->buf[4]))
591                         return 1;
592                 /* Just whitespace? */
593                 for (i = 3; i < line->len; i++) {
594                         unsigned char c = line->buf[i];
595                         if (c == '\n')
596                                 return 1;
597                         if (!isspace(c))
598                                 break;
599                 }
600                 return 0;
601         }
602         return 0;
603 }
604
605 static int is_scissors_line(const struct strbuf *line)
606 {
607         size_t i, len = line->len;
608         int scissors = 0, gap = 0;
609         int first_nonblank = -1;
610         int last_nonblank = 0, visible, perforation = 0, in_perforation = 0;
611         const char *buf = line->buf;
612
613         for (i = 0; i < len; i++) {
614                 if (isspace(buf[i])) {
615                         if (in_perforation) {
616                                 perforation++;
617                                 gap++;
618                         }
619                         continue;
620                 }
621                 last_nonblank = i;
622                 if (first_nonblank < 0)
623                         first_nonblank = i;
624                 if (buf[i] == '-') {
625                         in_perforation = 1;
626                         perforation++;
627                         continue;
628                 }
629                 if (i + 1 < len &&
630                     (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2) ||
631                      !memcmp(buf + i, ">%", 2) || !memcmp(buf + i, "%<", 2))) {
632                         in_perforation = 1;
633                         perforation += 2;
634                         scissors += 2;
635                         i++;
636                         continue;
637                 }
638                 in_perforation = 0;
639         }
640
641         /*
642          * The mark must be at least 8 bytes long (e.g. "-- >8 --").
643          * Even though there can be arbitrary cruft on the same line
644          * (e.g. "cut here"), in order to avoid misidentification, the
645          * perforation must occupy more than a third of the visible
646          * width of the line, and dashes and scissors must occupy more
647          * than half of the perforation.
648          */
649
650         visible = last_nonblank - first_nonblank + 1;
651         return (scissors && 8 <= visible &&
652                 visible < perforation * 3 &&
653                 gap * 2 < perforation);
654 }
655
656 static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
657 {
658         if (!cmitmsg)
659                 return 0;
660
661         if (mi->header_stage) {
662                 if (!line->len || (line->len == 1 && line->buf[0] == '\n'))
663                         return 0;
664         }
665
666         if (mi->use_inbody_headers && mi->header_stage) {
667                 mi->header_stage = check_header(mi, line, s_hdr_data, 0);
668                 if (mi->header_stage)
669                         return 0;
670         } else
671                 /* Only trim the first (blank) line of the commit message
672                  * when ignoring in-body headers.
673                  */
674                 mi->header_stage = 0;
675
676         /* normalize the log message to UTF-8. */
677         if (metainfo_charset)
678                 convert_to_utf8(line, charset.buf);
679
680         if (mi->use_scissors && is_scissors_line(line)) {
681                 int i;
682                 if (fseek(cmitmsg, 0L, SEEK_SET))
683                         die_errno("Could not rewind output message file");
684                 if (ftruncate(fileno(cmitmsg), 0))
685                         die_errno("Could not truncate output message file at scissors");
686                 mi->header_stage = 1;
687
688                 /*
689                  * We may have already read "secondary headers"; purge
690                  * them to give ourselves a clean restart.
691                  */
692                 for (i = 0; header[i]; i++) {
693                         if (s_hdr_data[i])
694                                 strbuf_release(s_hdr_data[i]);
695                         s_hdr_data[i] = NULL;
696                 }
697                 return 0;
698         }
699
700         if (patchbreak(line)) {
701                 if (mi->message_id)
702                         fprintf(cmitmsg, "Message-Id: %s\n", mi->message_id);
703                 fclose(cmitmsg);
704                 cmitmsg = NULL;
705                 return 1;
706         }
707
708         fputs(line->buf, cmitmsg);
709         return 0;
710 }
711
712 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
713 {
714         fwrite(line->buf, 1, line->len, patchfile);
715         mi->patch_lines++;
716 }
717
718 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
719 {
720         switch (mi->filter_stage) {
721         case 0:
722                 if (!handle_commit_msg(mi, line))
723                         break;
724                 mi->filter_stage++;
725         case 1:
726                 handle_patch(mi, line);
727                 break;
728         }
729 }
730
731 static int is_rfc2822_header(const struct strbuf *line)
732 {
733         /*
734          * The section that defines the loosest possible
735          * field name is "3.6.8 Optional fields".
736          *
737          * optional-field = field-name ":" unstructured CRLF
738          * field-name = 1*ftext
739          * ftext = %d33-57 / %59-126
740          */
741         int ch;
742         char *cp = line->buf;
743
744         /* Count mbox From headers as headers */
745         if (starts_with(cp, "From ") || starts_with(cp, ">From "))
746                 return 1;
747
748         while ((ch = *cp++)) {
749                 if (ch == ':')
750                         return 1;
751                 if ((33 <= ch && ch <= 57) ||
752                     (59 <= ch && ch <= 126))
753                         continue;
754                 break;
755         }
756         return 0;
757 }
758
759 static int read_one_header_line(struct strbuf *line, FILE *in)
760 {
761         struct strbuf continuation = STRBUF_INIT;
762
763         /* Get the first part of the line. */
764         if (strbuf_getline(line, in, '\n'))
765                 return 0;
766
767         /*
768          * Is it an empty line or not a valid rfc2822 header?
769          * If so, stop here, and return false ("not a header")
770          */
771         strbuf_rtrim(line);
772         if (!line->len || !is_rfc2822_header(line)) {
773                 /* Re-add the newline */
774                 strbuf_addch(line, '\n');
775                 return 0;
776         }
777
778         /*
779          * Now we need to eat all the continuation lines..
780          * Yuck, 2822 header "folding"
781          */
782         for (;;) {
783                 int peek;
784
785                 peek = fgetc(in); ungetc(peek, in);
786                 if (peek != ' ' && peek != '\t')
787                         break;
788                 if (strbuf_getline(&continuation, in, '\n'))
789                         break;
790                 continuation.buf[0] = ' ';
791                 strbuf_rtrim(&continuation);
792                 strbuf_addbuf(line, &continuation);
793         }
794         strbuf_release(&continuation);
795
796         return 1;
797 }
798
799 static int find_boundary(struct mailinfo *mi, struct strbuf *line)
800 {
801         while (!strbuf_getline(line, mi->input, '\n')) {
802                 if (*content_top && is_multipart_boundary(line))
803                         return 1;
804         }
805         return 0;
806 }
807
808 static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
809 {
810         struct strbuf newline = STRBUF_INIT;
811
812         strbuf_addch(&newline, '\n');
813 again:
814         if (line->len >= (*content_top)->len + 2 &&
815             !memcmp(line->buf + (*content_top)->len, "--", 2)) {
816                 /* we hit an end boundary */
817                 /* pop the current boundary off the stack */
818                 strbuf_release(*content_top);
819                 free(*content_top);
820                 *content_top = NULL;
821
822                 /* technically won't happen as is_multipart_boundary()
823                    will fail first.  But just in case..
824                  */
825                 if (--content_top < content) {
826                         fprintf(stderr, "Detected mismatched boundaries, "
827                                         "can't recover\n");
828                         exit(1);
829                 }
830                 handle_filter(mi, &newline);
831                 strbuf_release(&newline);
832
833                 /* skip to the next boundary */
834                 if (!find_boundary(mi, line))
835                         return 0;
836                 goto again;
837         }
838
839         /* set some defaults */
840         transfer_encoding = TE_DONTCARE;
841         strbuf_reset(&charset);
842
843         /* slurp in this section's info */
844         while (read_one_header_line(line, mi->input))
845                 check_header(mi, line, p_hdr_data, 0);
846
847         strbuf_release(&newline);
848         /* replenish line */
849         if (strbuf_getline(line, mi->input, '\n'))
850                 return 0;
851         strbuf_addch(line, '\n');
852         return 1;
853 }
854
855 static void handle_body(struct mailinfo *mi, struct strbuf *line)
856 {
857         struct strbuf prev = STRBUF_INIT;
858
859         /* Skip up to the first boundary */
860         if (*content_top) {
861                 if (!find_boundary(mi, line))
862                         goto handle_body_out;
863         }
864
865         do {
866                 /* process any boundary lines */
867                 if (*content_top && is_multipart_boundary(line)) {
868                         /* flush any leftover */
869                         if (prev.len) {
870                                 handle_filter(mi, &prev);
871                                 strbuf_reset(&prev);
872                         }
873                         if (!handle_boundary(mi, line))
874                                 goto handle_body_out;
875                 }
876
877                 /* Unwrap transfer encoding */
878                 decode_transfer_encoding(line);
879
880                 switch (transfer_encoding) {
881                 case TE_BASE64:
882                 case TE_QP:
883                 {
884                         struct strbuf **lines, **it, *sb;
885
886                         /* Prepend any previous partial lines */
887                         strbuf_insert(line, 0, prev.buf, prev.len);
888                         strbuf_reset(&prev);
889
890                         /*
891                          * This is a decoded line that may contain
892                          * multiple new lines.  Pass only one chunk
893                          * at a time to handle_filter()
894                          */
895                         lines = strbuf_split(line, '\n');
896                         for (it = lines; (sb = *it); it++) {
897                                 if (*(it + 1) == NULL) /* The last line */
898                                         if (sb->buf[sb->len - 1] != '\n') {
899                                                 /* Partial line, save it for later. */
900                                                 strbuf_addbuf(&prev, sb);
901                                                 break;
902                                         }
903                                 handle_filter(mi, sb);
904                         }
905                         /*
906                          * The partial chunk is saved in "prev" and will be
907                          * appended by the next iteration of read_line_with_nul().
908                          */
909                         strbuf_list_free(lines);
910                         break;
911                 }
912                 default:
913                         handle_filter(mi, line);
914                 }
915
916         } while (!strbuf_getwholeline(line, mi->input, '\n'));
917
918 handle_body_out:
919         strbuf_release(&prev);
920 }
921
922 static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
923 {
924         const char *sp = data->buf;
925         while (1) {
926                 char *ep = strchr(sp, '\n');
927                 int len;
928                 if (!ep)
929                         len = strlen(sp);
930                 else
931                         len = ep - sp;
932                 fprintf(fout, "%s: %.*s\n", hdr, len, sp);
933                 if (!ep)
934                         break;
935                 sp = ep + 1;
936         }
937 }
938
939 static void handle_info(struct mailinfo *mi)
940 {
941         struct strbuf *hdr;
942         int i;
943
944         for (i = 0; header[i]; i++) {
945                 /* only print inbody headers if we output a patch file */
946                 if (mi->patch_lines && s_hdr_data[i])
947                         hdr = s_hdr_data[i];
948                 else if (p_hdr_data[i])
949                         hdr = p_hdr_data[i];
950                 else
951                         continue;
952
953                 if (!strcmp(header[i], "Subject")) {
954                         if (!mi->keep_subject) {
955                                 cleanup_subject(mi, hdr);
956                                 cleanup_space(hdr);
957                         }
958                         output_header_lines(mi->output, "Subject", hdr);
959                 } else if (!strcmp(header[i], "From")) {
960                         cleanup_space(hdr);
961                         handle_from(mi, hdr);
962                         fprintf(mi->output, "Author: %s\n", mi->name.buf);
963                         fprintf(mi->output, "Email: %s\n", mi->email.buf);
964                 } else {
965                         cleanup_space(hdr);
966                         fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
967                 }
968         }
969         fprintf(mi->output, "\n");
970 }
971
972 static int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
973 {
974         int peek;
975         struct strbuf line = STRBUF_INIT;
976
977         cmitmsg = fopen(msg, "w");
978         if (!cmitmsg) {
979                 perror(msg);
980                 return -1;
981         }
982         patchfile = fopen(patch, "w");
983         if (!patchfile) {
984                 perror(patch);
985                 fclose(cmitmsg);
986                 return -1;
987         }
988
989         p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*p_hdr_data));
990         s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(*s_hdr_data));
991
992         do {
993                 peek = fgetc(mi->input);
994         } while (isspace(peek));
995         ungetc(peek, mi->input);
996
997         /* process the email header */
998         while (read_one_header_line(&line, mi->input))
999                 check_header(mi, &line, p_hdr_data, 1);
1000
1001         handle_body(mi, &line);
1002         fclose(patchfile);
1003
1004         handle_info(mi);
1005         strbuf_release(&line);
1006         return 0;
1007 }
1008
1009 static int git_mailinfo_config(const char *var, const char *value, void *mi_)
1010 {
1011         struct mailinfo *mi = mi_;
1012
1013         if (!starts_with(var, "mailinfo."))
1014                 return git_default_config(var, value, NULL);
1015         if (!strcmp(var, "mailinfo.scissors")) {
1016                 mi->use_scissors = git_config_bool(var, value);
1017                 return 0;
1018         }
1019         /* perhaps others here */
1020         return 0;
1021 }
1022
1023 static void setup_mailinfo(struct mailinfo *mi)
1024 {
1025         memset(mi, 0, sizeof(*mi));
1026         strbuf_init(&mi->name, 0);
1027         strbuf_init(&mi->email, 0);
1028         mi->header_stage = 1;
1029         mi->use_inbody_headers = 1;
1030         git_config(git_mailinfo_config, &mi);
1031 }
1032
1033 static void clear_mailinfo(struct mailinfo *mi)
1034 {
1035         strbuf_release(&mi->name);
1036         strbuf_release(&mi->email);
1037         free(mi->message_id);
1038 }
1039
1040 static const char mailinfo_usage[] =
1041         "git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] <msg> <patch> < mail >info";
1042
1043 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
1044 {
1045         const char *def_charset;
1046         struct mailinfo mi;
1047         int status;
1048
1049         /* NEEDSWORK: might want to do the optional .git/ directory
1050          * discovery
1051          */
1052         setup_mailinfo(&mi);
1053
1054         def_charset = get_commit_output_encoding();
1055         metainfo_charset = def_charset;
1056
1057         while (1 < argc && argv[1][0] == '-') {
1058                 if (!strcmp(argv[1], "-k"))
1059                         mi.keep_subject = 1;
1060                 else if (!strcmp(argv[1], "-b"))
1061                         mi.keep_non_patch_brackets_in_subject = 1;
1062                 else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
1063                         mi.add_message_id = 1;
1064                 else if (!strcmp(argv[1], "-u"))
1065                         metainfo_charset = def_charset;
1066                 else if (!strcmp(argv[1], "-n"))
1067                         metainfo_charset = NULL;
1068                 else if (starts_with(argv[1], "--encoding="))
1069                         metainfo_charset = argv[1] + 11;
1070                 else if (!strcmp(argv[1], "--scissors"))
1071                         mi.use_scissors = 1;
1072                 else if (!strcmp(argv[1], "--no-scissors"))
1073                         mi.use_scissors = 0;
1074                 else if (!strcmp(argv[1], "--no-inbody-headers"))
1075                         mi.use_inbody_headers = 0;
1076                 else
1077                         usage(mailinfo_usage);
1078                 argc--; argv++;
1079         }
1080
1081         if (argc != 3)
1082                 usage(mailinfo_usage);
1083
1084         mi.input = stdin;
1085         mi.output = stdout;
1086         status = !!mailinfo(&mi, argv[1], argv[2]);
1087         clear_mailinfo(&mi);
1088
1089         return status;
1090 }