Merge branch 'ls/maint-mailinfo-patch-label'
[git] / builtin-mailinfo.c
1 /*
2  * Another stupid program, this one parsing the headers of an
3  * email to figure out authorship and subject
4  */
5 #include "cache.h"
6 #include "builtin.h"
7 #include "utf8.h"
8
9 static FILE *cmitmsg, *patchfile, *fin, *fout;
10
11 static int keep_subject;
12 static const char *metainfo_charset;
13 static char line[1000];
14 static char name[1000];
15 static char email[1000];
16
17 static enum  {
18         TE_DONTCARE, TE_QP, TE_BASE64,
19 } transfer_encoding;
20 static enum  {
21         TYPE_TEXT, TYPE_OTHER,
22 } message_type;
23
24 static char charset[256];
25 static int patch_lines;
26 static char **p_hdr_data, **s_hdr_data;
27
28 #define MAX_HDR_PARSED 10
29 #define MAX_BOUNDARIES 5
30
31 static char *sanity_check(char *name, char *email)
32 {
33         int len = strlen(name);
34         if (len < 3 || len > 60)
35                 return email;
36         if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
37                 return email;
38         return name;
39 }
40
41 static int bogus_from(char *line)
42 {
43         /* John Doe <johndoe> */
44         char *bra, *ket, *dst, *cp;
45
46         /* This is fallback, so do not bother if we already have an
47          * e-mail address.
48          */
49         if (*email)
50                 return 0;
51
52         bra = strchr(line, '<');
53         if (!bra)
54                 return 0;
55         ket = strchr(bra, '>');
56         if (!ket)
57                 return 0;
58
59         for (dst = email, cp = bra+1; cp < ket; )
60                 *dst++ = *cp++;
61         *dst = 0;
62         for (cp = line; isspace(*cp); cp++)
63                 ;
64         for (bra--; isspace(*bra); bra--)
65                 *bra = 0;
66         cp = sanity_check(cp, email);
67         strcpy(name, cp);
68         return 1;
69 }
70
71 static int handle_from(char *in_line)
72 {
73         char line[1000];
74         char *at;
75         char *dst;
76
77         strcpy(line, in_line);
78         at = strchr(line, '@');
79         if (!at)
80                 return bogus_from(line);
81
82         /*
83          * If we already have one email, don't take any confusing lines
84          */
85         if (*email && strchr(at+1, '@'))
86                 return 0;
87
88         /* Pick up the string around '@', possibly delimited with <>
89          * pair; that is the email part.  White them out while copying.
90          */
91         while (at > line) {
92                 char c = at[-1];
93                 if (isspace(c))
94                         break;
95                 if (c == '<') {
96                         at[-1] = ' ';
97                         break;
98                 }
99                 at--;
100         }
101         dst = email;
102         for (;;) {
103                 unsigned char c = *at;
104                 if (!c || c == '>' || isspace(c)) {
105                         if (c == '>')
106                                 *at = ' ';
107                         break;
108                 }
109                 *at++ = ' ';
110                 *dst++ = c;
111         }
112         *dst++ = 0;
113
114         /* The remainder is name.  It could be "John Doe <john.doe@xz>"
115          * or "john.doe@xz (John Doe)", but we have whited out the
116          * email part, so trim from both ends, possibly removing
117          * the () pair at the end.
118          */
119         at = line + strlen(line);
120         while (at > line) {
121                 unsigned char c = *--at;
122                 if (!isspace(c)) {
123                         at[(c == ')') ? 0 : 1] = 0;
124                         break;
125                 }
126         }
127
128         at = line;
129         for (;;) {
130                 unsigned char c = *at;
131                 if (!c || !isspace(c)) {
132                         if (c == '(')
133                                 at++;
134                         break;
135                 }
136                 at++;
137         }
138         at = sanity_check(at, email);
139         strcpy(name, at);
140         return 1;
141 }
142
143 static int handle_header(char *line, char *data, int ofs)
144 {
145         if (!line || !data)
146                 return 1;
147
148         strcpy(data, line+ofs);
149
150         return 0;
151 }
152
153 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
154  * to have enough heuristics to grok MIME encoded patches often found
155  * on our mailing lists.  For example, we do not even treat header lines
156  * case insensitively.
157  */
158
159 static int slurp_attr(const char *line, const char *name, char *attr)
160 {
161         const char *ends, *ap = strcasestr(line, name);
162         size_t sz;
163
164         if (!ap) {
165                 *attr = 0;
166                 return 0;
167         }
168         ap += strlen(name);
169         if (*ap == '"') {
170                 ap++;
171                 ends = "\"";
172         }
173         else
174                 ends = "; \t";
175         sz = strcspn(ap, ends);
176         memcpy(attr, ap, sz);
177         attr[sz] = 0;
178         return 1;
179 }
180
181 struct content_type {
182         char *boundary;
183         int boundary_len;
184 };
185
186 static struct content_type content[MAX_BOUNDARIES];
187
188 static struct content_type *content_top = content;
189
190 static int handle_content_type(char *line)
191 {
192         char boundary[256];
193
194         if (strcasestr(line, "text/") == NULL)
195                  message_type = TYPE_OTHER;
196         if (slurp_attr(line, "boundary=", boundary + 2)) {
197                 memcpy(boundary, "--", 2);
198                 if (content_top++ >= &content[MAX_BOUNDARIES]) {
199                         fprintf(stderr, "Too many boundaries to handle\n");
200                         exit(1);
201                 }
202                 content_top->boundary_len = strlen(boundary);
203                 content_top->boundary = xmalloc(content_top->boundary_len+1);
204                 strcpy(content_top->boundary, boundary);
205         }
206         if (slurp_attr(line, "charset=", charset)) {
207                 int i, c;
208                 for (i = 0; (c = charset[i]) != 0; i++)
209                         charset[i] = tolower(c);
210         }
211         return 0;
212 }
213
214 static int handle_content_transfer_encoding(char *line)
215 {
216         if (strcasestr(line, "base64"))
217                 transfer_encoding = TE_BASE64;
218         else if (strcasestr(line, "quoted-printable"))
219                 transfer_encoding = TE_QP;
220         else
221                 transfer_encoding = TE_DONTCARE;
222         return 0;
223 }
224
225 static int is_multipart_boundary(const char *line)
226 {
227         return (!memcmp(line, content_top->boundary, content_top->boundary_len));
228 }
229
230 static int eatspace(char *line)
231 {
232         int len = strlen(line);
233         while (len > 0 && isspace(line[len-1]))
234                 line[--len] = 0;
235         return len;
236 }
237
238 static char *cleanup_subject(char *subject)
239 {
240         for (;;) {
241                 char *p;
242                 int len, remove;
243                 switch (*subject) {
244                 case 'r': case 'R':
245                         if (!memcmp("e:", subject+1, 2)) {
246                                 subject += 3;
247                                 continue;
248                         }
249                         break;
250                 case ' ': case '\t': case ':':
251                         subject++;
252                         continue;
253
254                 case '[':
255                         p = strchr(subject, ']');
256                         if (!p) {
257                                 subject++;
258                                 continue;
259                         }
260                         len = strlen(p);
261                         remove = p - subject;
262                         if (remove <= len *2) {
263                                 subject = p+1;
264                                 continue;
265                         }
266                         break;
267                 }
268                 eatspace(subject);
269                 return subject;
270         }
271 }
272
273 static void cleanup_space(char *buf)
274 {
275         unsigned char c;
276         while ((c = *buf) != 0) {
277                 buf++;
278                 if (isspace(c)) {
279                         buf[-1] = ' ';
280                         c = *buf;
281                         while (isspace(c)) {
282                                 int len = strlen(buf);
283                                 memmove(buf, buf+1, len);
284                                 c = *buf;
285                         }
286                 }
287         }
288 }
289
290 static void decode_header(char *it, unsigned itsize);
291 static const char *header[MAX_HDR_PARSED] = {
292         "From","Subject","Date",
293 };
294
295 static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
296 {
297         int i;
298
299         /* search for the interesting parts */
300         for (i = 0; header[i]; i++) {
301                 int len = strlen(header[i]);
302                 if ((!hdr_data[i] || overwrite) &&
303                     !strncasecmp(line, header[i], len) &&
304                     line[len] == ':' && isspace(line[len + 1])) {
305                         /* Unwrap inline B and Q encoding, and optionally
306                          * normalize the meta information to utf8.
307                          */
308                         decode_header(line + len + 2, linesize - len - 2);
309                         hdr_data[i] = xmalloc(1000 * sizeof(char));
310                         if (! handle_header(line, hdr_data[i], len + 2)) {
311                                 return 1;
312                         }
313                 }
314         }
315
316         /* Content stuff */
317         if (!strncasecmp(line, "Content-Type", 12) &&
318                 line[12] == ':' && isspace(line[12 + 1])) {
319                 decode_header(line + 12 + 2, linesize - 12 - 2);
320                 if (! handle_content_type(line)) {
321                         return 1;
322                 }
323         }
324         if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
325                 line[25] == ':' && isspace(line[25 + 1])) {
326                 decode_header(line + 25 + 2, linesize - 25 - 2);
327                 if (! handle_content_transfer_encoding(line)) {
328                         return 1;
329                 }
330         }
331
332         /* for inbody stuff */
333         if (!memcmp(">From", line, 5) && isspace(line[5]))
334                 return 1;
335         if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
336                 for (i = 0; header[i]; i++) {
337                         if (!memcmp("Subject", header[i], 7)) {
338                                 if (!hdr_data[i])
339                                         hdr_data[i] = xmalloc(linesize + 20);
340                                 if (! handle_header(line, hdr_data[i], 0)) {
341                                         return 1;
342                                 }
343                         }
344                 }
345         }
346
347         /* no match */
348         return 0;
349 }
350
351 static int is_rfc2822_header(char *line)
352 {
353         /*
354          * The section that defines the loosest possible
355          * field name is "3.6.8 Optional fields".
356          *
357          * optional-field = field-name ":" unstructured CRLF
358          * field-name = 1*ftext
359          * ftext = %d33-57 / %59-126
360          */
361         int ch;
362         char *cp = line;
363
364         /* Count mbox From headers as headers */
365         if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
366                 return 1;
367
368         while ((ch = *cp++)) {
369                 if (ch == ':')
370                         return cp != line;
371                 if ((33 <= ch && ch <= 57) ||
372                     (59 <= ch && ch <= 126))
373                         continue;
374                 break;
375         }
376         return 0;
377 }
378
379 /*
380  * sz is size of 'line' buffer in bytes.  Must be reasonably
381  * long enough to hold one physical real-world e-mail line.
382  */
383 static int read_one_header_line(char *line, int sz, FILE *in)
384 {
385         int len;
386
387         /*
388          * We will read at most (sz-1) bytes and then potentially
389          * re-add NUL after it.  Accessing line[sz] after this is safe
390          * and we can allow len to grow up to and including sz.
391          */
392         sz--;
393
394         /* Get the first part of the line. */
395         if (!fgets(line, sz, in))
396                 return 0;
397
398         /*
399          * Is it an empty line or not a valid rfc2822 header?
400          * If so, stop here, and return false ("not a header")
401          */
402         len = eatspace(line);
403         if (!len || !is_rfc2822_header(line)) {
404                 /* Re-add the newline */
405                 line[len] = '\n';
406                 line[len + 1] = '\0';
407                 return 0;
408         }
409
410         /*
411          * Now we need to eat all the continuation lines..
412          * Yuck, 2822 header "folding"
413          */
414         for (;;) {
415                 int peek, addlen;
416                 static char continuation[1000];
417
418                 peek = fgetc(in); ungetc(peek, in);
419                 if (peek != ' ' && peek != '\t')
420                         break;
421                 if (!fgets(continuation, sizeof(continuation), in))
422                         break;
423                 addlen = eatspace(continuation);
424                 if (len < sz - 1) {
425                         if (addlen >= sz - len)
426                                 addlen = sz - len - 1;
427                         memcpy(line + len, continuation, addlen);
428                         line[len] = '\n';
429                         len += addlen;
430                 }
431         }
432         line[len] = 0;
433
434         return 1;
435 }
436
437 static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
438 {
439         char *otbegin = ot;
440         char *otend = ot + otsize;
441         int c;
442         while ((c = *in++) != 0 && (in <= ep)) {
443                 if (ot == otend) {
444                         *--ot = '\0';
445                         return -1;
446                 }
447                 if (c == '=') {
448                         int d = *in++;
449                         if (d == '\n' || !d)
450                                 break; /* drop trailing newline */
451                         *ot++ = ((hexval(d) << 4) | hexval(*in++));
452                         continue;
453                 }
454                 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
455                         c = 0x20;
456                 *ot++ = c;
457         }
458         *ot = 0;
459         return (ot - otbegin);
460 }
461
462 static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
463 {
464         /* Decode in..ep, possibly in-place to ot */
465         int c, pos = 0, acc = 0;
466         char *otbegin = ot;
467         char *otend = ot + otsize;
468
469         while ((c = *in++) != 0 && (in <= ep)) {
470                 if (ot == otend) {
471                         *--ot = '\0';
472                         return -1;
473                 }
474                 if (c == '+')
475                         c = 62;
476                 else if (c == '/')
477                         c = 63;
478                 else if ('A' <= c && c <= 'Z')
479                         c -= 'A';
480                 else if ('a' <= c && c <= 'z')
481                         c -= 'a' - 26;
482                 else if ('0' <= c && c <= '9')
483                         c -= '0' - 52;
484                 else if (c == '=') {
485                         /* padding is almost like (c == 0), except we do
486                          * not output NUL resulting only from it;
487                          * for now we just trust the data.
488                          */
489                         c = 0;
490                 }
491                 else
492                         continue; /* garbage */
493                 switch (pos++) {
494                 case 0:
495                         acc = (c << 2);
496                         break;
497                 case 1:
498                         *ot++ = (acc | (c >> 4));
499                         acc = (c & 15) << 4;
500                         break;
501                 case 2:
502                         *ot++ = (acc | (c >> 2));
503                         acc = (c & 3) << 6;
504                         break;
505                 case 3:
506                         *ot++ = (acc | c);
507                         acc = pos = 0;
508                         break;
509                 }
510         }
511         *ot = 0;
512         return (ot - otbegin);
513 }
514
515 /*
516  * When there is no known charset, guess.
517  *
518  * Right now we assume that if the target is UTF-8 (the default),
519  * and it already looks like UTF-8 (which includes US-ASCII as its
520  * subset, of course) then that is what it is and there is nothing
521  * to do.
522  *
523  * Otherwise, we default to assuming it is Latin1 for historical
524  * reasons.
525  */
526 static const char *guess_charset(const char *line, const char *target_charset)
527 {
528         if (is_encoding_utf8(target_charset)) {
529                 if (is_utf8(line))
530                         return NULL;
531         }
532         return "latin1";
533 }
534
535 static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
536 {
537         char *out;
538
539         if (!charset || !*charset) {
540                 charset = guess_charset(line, metainfo_charset);
541                 if (!charset)
542                         return;
543         }
544
545         if (!strcmp(metainfo_charset, charset))
546                 return;
547         out = reencode_string(line, metainfo_charset, charset);
548         if (!out)
549                 die("cannot convert from %s to %s\n",
550                     charset, metainfo_charset);
551         strlcpy(line, out, linesize);
552         free(out);
553 }
554
555 static int decode_header_bq(char *it, unsigned itsize)
556 {
557         char *in, *out, *ep, *cp, *sp;
558         char outbuf[1000];
559         int rfc2047 = 0;
560
561         in = it;
562         out = outbuf;
563         while ((ep = strstr(in, "=?")) != NULL) {
564                 int sz, encoding;
565                 char charset_q[256], piecebuf[256];
566                 rfc2047 = 1;
567
568                 if (in != ep) {
569                         sz = ep - in;
570                         memcpy(out, in, sz);
571                         out += sz;
572                         in += sz;
573                 }
574                 /* E.g.
575                  * ep : "=?iso-2022-jp?B?GyR...?= foo"
576                  * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
577                  */
578                 ep += 2;
579                 cp = strchr(ep, '?');
580                 if (!cp)
581                         return rfc2047; /* no munging */
582                 for (sp = ep; sp < cp; sp++)
583                         charset_q[sp - ep] = tolower(*sp);
584                 charset_q[cp - ep] = 0;
585                 encoding = cp[1];
586                 if (!encoding || cp[2] != '?')
587                         return rfc2047; /* no munging */
588                 ep = strstr(cp + 3, "?=");
589                 if (!ep)
590                         return rfc2047; /* no munging */
591                 switch (tolower(encoding)) {
592                 default:
593                         return rfc2047; /* no munging */
594                 case 'b':
595                         sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
596                         break;
597                 case 'q':
598                         sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
599                         break;
600                 }
601                 if (sz < 0)
602                         return rfc2047;
603                 if (metainfo_charset)
604                         convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
605
606                 sz = strlen(piecebuf);
607                 if (outbuf + sizeof(outbuf) <= out + sz)
608                         return rfc2047; /* no munging */
609                 strcpy(out, piecebuf);
610                 out += sz;
611                 in = ep + 2;
612         }
613         strcpy(out, in);
614         strlcpy(it, outbuf, itsize);
615         return rfc2047;
616 }
617
618 static void decode_header(char *it, unsigned itsize)
619 {
620
621         if (decode_header_bq(it, itsize))
622                 return;
623         /* otherwise "it" is a straight copy of the input.
624          * This can be binary guck but there is no charset specified.
625          */
626         if (metainfo_charset)
627                 convert_to_utf8(it, itsize, "");
628 }
629
630 static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen)
631 {
632         char *ep;
633
634         switch (transfer_encoding) {
635         case TE_QP:
636                 ep = line + inputlen;
637                 return decode_q_segment(line, line, linesize, ep, 0);
638         case TE_BASE64:
639                 ep = line + inputlen;
640                 return decode_b_segment(line, line, linesize, ep);
641         case TE_DONTCARE:
642         default:
643                 return inputlen;
644         }
645 }
646
647 static int handle_filter(char *line, unsigned linesize, int linelen);
648
649 static int find_boundary(void)
650 {
651         while(fgets(line, sizeof(line), fin) != NULL) {
652                 if (is_multipart_boundary(line))
653                         return 1;
654         }
655         return 0;
656 }
657
658 static int handle_boundary(void)
659 {
660         char newline[]="\n";
661 again:
662         if (!memcmp(line+content_top->boundary_len, "--", 2)) {
663                 /* we hit an end boundary */
664                 /* pop the current boundary off the stack */
665                 free(content_top->boundary);
666
667                 /* technically won't happen as is_multipart_boundary()
668                    will fail first.  But just in case..
669                  */
670                 if (content_top-- < content) {
671                         fprintf(stderr, "Detected mismatched boundaries, "
672                                         "can't recover\n");
673                         exit(1);
674                 }
675                 handle_filter(newline, sizeof(newline), strlen(newline));
676
677                 /* skip to the next boundary */
678                 if (!find_boundary())
679                         return 0;
680                 goto again;
681         }
682
683         /* set some defaults */
684         transfer_encoding = TE_DONTCARE;
685         charset[0] = 0;
686         message_type = TYPE_TEXT;
687
688         /* slurp in this section's info */
689         while (read_one_header_line(line, sizeof(line), fin))
690                 check_header(line, sizeof(line), p_hdr_data, 0);
691
692         /* eat the blank line after section info */
693         return (fgets(line, sizeof(line), fin) != NULL);
694 }
695
696 static inline int patchbreak(const char *line)
697 {
698         /* Beginning of a "diff -" header? */
699         if (!memcmp("diff -", line, 6))
700                 return 1;
701
702         /* CVS "Index: " line? */
703         if (!memcmp("Index: ", line, 7))
704                 return 1;
705
706         /*
707          * "--- <filename>" starts patches without headers
708          * "---<sp>*" is a manual separator
709          */
710         if (!memcmp("---", line, 3)) {
711                 line += 3;
712                 /* space followed by a filename? */
713                 if (line[0] == ' ' && !isspace(line[1]))
714                         return 1;
715                 /* Just whitespace? */
716                 for (;;) {
717                         unsigned char c = *line++;
718                         if (c == '\n')
719                                 return 1;
720                         if (!isspace(c))
721                                 break;
722                 }
723                 return 0;
724         }
725         return 0;
726 }
727
728
729 static int handle_commit_msg(char *line, unsigned linesize)
730 {
731         static int still_looking = 1;
732         char *endline = line + linesize;
733
734         if (!cmitmsg)
735                 return 0;
736
737         if (still_looking) {
738                 char *cp = line;
739                 if (isspace(*line)) {
740                         for (cp = line + 1; *cp; cp++) {
741                                 if (!isspace(*cp))
742                                         break;
743                         }
744                         if (!*cp)
745                                 return 0;
746                 }
747                 if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
748                         return 0;
749         }
750
751         /* normalize the log message to UTF-8. */
752         if (metainfo_charset)
753                 convert_to_utf8(line, endline - line, charset);
754
755         if (patchbreak(line)) {
756                 fclose(cmitmsg);
757                 cmitmsg = NULL;
758                 return 1;
759         }
760
761         fputs(line, cmitmsg);
762         return 0;
763 }
764
765 static int handle_patch(char *line, int len)
766 {
767         fwrite(line, 1, len, patchfile);
768         patch_lines++;
769         return 0;
770 }
771
772 static int handle_filter(char *line, unsigned linesize, int linelen)
773 {
774         static int filter = 0;
775
776         /* filter tells us which part we left off on
777          * a non-zero return indicates we hit a filter point
778          */
779         switch (filter) {
780         case 0:
781                 if (!handle_commit_msg(line, linesize))
782                         break;
783                 filter++;
784         case 1:
785                 if (!handle_patch(line, linelen))
786                         break;
787                 filter++;
788         default:
789                 return 1;
790         }
791
792         return 0;
793 }
794
795 static void handle_body(void)
796 {
797         int rc = 0;
798         static char newline[2000];
799         static char *np = newline;
800         int len = strlen(line);
801
802         /* Skip up to the first boundary */
803         if (content_top->boundary) {
804                 if (!find_boundary())
805                         return;
806         }
807
808         do {
809                 /* process any boundary lines */
810                 if (content_top->boundary && is_multipart_boundary(line)) {
811                         /* flush any leftover */
812                         if (np != newline)
813                                 handle_filter(newline, sizeof(newline),
814                                               np - newline);
815                         if (!handle_boundary())
816                                 return;
817                         len = strlen(line);
818                 }
819
820                 /* Unwrap transfer encoding */
821                 len = decode_transfer_encoding(line, sizeof(line), len);
822                 if (len < 0) {
823                         error("Malformed input line");
824                         return;
825                 }
826
827                 switch (transfer_encoding) {
828                 case TE_BASE64:
829                 case TE_QP:
830                 {
831                         char *op = line;
832
833                         /* binary data most likely doesn't have newlines */
834                         if (message_type != TYPE_TEXT) {
835                                 rc = handle_filter(line, sizeof(line), len);
836                                 break;
837                         }
838
839                         /*
840                          * This is a decoded line that may contain
841                          * multiple new lines.  Pass only one chunk
842                          * at a time to handle_filter()
843                          */
844                         do {
845                                 while (op < line + len && *op != '\n')
846                                         *np++ = *op++;
847                                 *np = *op;
848                                 if (*np != 0) {
849                                         /* should be sitting on a new line */
850                                         *(++np) = 0;
851                                         op++;
852                                         rc = handle_filter(newline, sizeof(newline), np - newline);
853                                         np = newline;
854                                 }
855                         } while (op < line + len);
856                         /*
857                          * The partial chunk is saved in newline and will be
858                          * appended by the next iteration of read_line_with_nul().
859                          */
860                         break;
861                 }
862                 default:
863                         rc = handle_filter(line, sizeof(line), len);
864                 }
865                 if (rc)
866                         /* nothing left to filter */
867                         break;
868         } while ((len = read_line_with_nul(line, sizeof(line), fin)));
869
870         return;
871 }
872
873 static void output_header_lines(FILE *fout, const char *hdr, char *data)
874 {
875         while (1) {
876                 char *ep = strchr(data, '\n');
877                 int len;
878                 if (!ep)
879                         len = strlen(data);
880                 else
881                         len = ep - data;
882                 fprintf(fout, "%s: %.*s\n", hdr, len, data);
883                 if (!ep)
884                         break;
885                 data = ep + 1;
886         }
887 }
888
889 static void handle_info(void)
890 {
891         char *sub;
892         char *hdr;
893         int i;
894
895         for (i = 0; header[i]; i++) {
896
897                 /* only print inbody headers if we output a patch file */
898                 if (patch_lines && s_hdr_data[i])
899                         hdr = s_hdr_data[i];
900                 else if (p_hdr_data[i])
901                         hdr = p_hdr_data[i];
902                 else
903                         continue;
904
905                 if (!memcmp(header[i], "Subject", 7)) {
906                         if (keep_subject)
907                                 sub = hdr;
908                         else {
909                                 sub = cleanup_subject(hdr);
910                                 cleanup_space(sub);
911                         }
912                         output_header_lines(fout, "Subject", sub);
913                 } else if (!memcmp(header[i], "From", 4)) {
914                         handle_from(hdr);
915                         fprintf(fout, "Author: %s\n", name);
916                         fprintf(fout, "Email: %s\n", email);
917                 } else {
918                         cleanup_space(hdr);
919                         fprintf(fout, "%s: %s\n", header[i], hdr);
920                 }
921         }
922         fprintf(fout, "\n");
923 }
924
925 static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
926                     const char *msg, const char *patch)
927 {
928         int peek;
929         keep_subject = ks;
930         metainfo_charset = encoding;
931         fin = in;
932         fout = out;
933
934         cmitmsg = fopen(msg, "w");
935         if (!cmitmsg) {
936                 perror(msg);
937                 return -1;
938         }
939         patchfile = fopen(patch, "w");
940         if (!patchfile) {
941                 perror(patch);
942                 fclose(cmitmsg);
943                 return -1;
944         }
945
946         p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
947         s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
948
949         do {
950                 peek = fgetc(in);
951         } while (isspace(peek));
952         ungetc(peek, in);
953
954         /* process the email header */
955         while (read_one_header_line(line, sizeof(line), fin))
956                 check_header(line, sizeof(line), p_hdr_data, 1);
957
958         handle_body();
959         handle_info();
960
961         return 0;
962 }
963
964 static const char mailinfo_usage[] =
965         "git-mailinfo [-k] [-u | --encoding=<encoding> | -n] msg patch <mail >info";
966
967 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
968 {
969         const char *def_charset;
970
971         /* NEEDSWORK: might want to do the optional .git/ directory
972          * discovery
973          */
974         git_config(git_default_config, NULL);
975
976         def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
977         metainfo_charset = def_charset;
978
979         while (1 < argc && argv[1][0] == '-') {
980                 if (!strcmp(argv[1], "-k"))
981                         keep_subject = 1;
982                 else if (!strcmp(argv[1], "-u"))
983                         metainfo_charset = def_charset;
984                 else if (!strcmp(argv[1], "-n"))
985                         metainfo_charset = NULL;
986                 else if (!prefixcmp(argv[1], "--encoding="))
987                         metainfo_charset = argv[1] + 11;
988                 else
989                         usage(mailinfo_usage);
990                 argc--; argv++;
991         }
992
993         if (argc != 3)
994                 usage(mailinfo_usage);
995
996         return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]);
997 }