Documentation: fix graph in git-rev-parse.txt
[git] / builtin-mailinfo.c
1 /*
2  * Another stupid program, this one parsing the headers of an
3  * email to figure out authorship and subject
4  */
5 #include "cache.h"
6 #include "builtin.h"
7 #include "utf8.h"
8
9 static FILE *cmitmsg, *patchfile, *fin, *fout;
10
11 static int keep_subject;
12 static const char *metainfo_charset;
13 static char line[1000];
14 static char name[1000];
15 static char email[1000];
16
17 static enum  {
18         TE_DONTCARE, TE_QP, TE_BASE64,
19 } transfer_encoding;
20 static enum  {
21         TYPE_TEXT, TYPE_OTHER,
22 } message_type;
23
24 static char charset[256];
25 static int patch_lines;
26 static char **p_hdr_data, **s_hdr_data;
27
28 #define MAX_HDR_PARSED 10
29 #define MAX_BOUNDARIES 5
30
31 static char *sanity_check(char *name, char *email)
32 {
33         int len = strlen(name);
34         if (len < 3 || len > 60)
35                 return email;
36         if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
37                 return email;
38         return name;
39 }
40
41 static int bogus_from(char *line)
42 {
43         /* John Doe <johndoe> */
44         char *bra, *ket, *dst, *cp;
45
46         /* This is fallback, so do not bother if we already have an
47          * e-mail address.
48          */
49         if (*email)
50                 return 0;
51
52         bra = strchr(line, '<');
53         if (!bra)
54                 return 0;
55         ket = strchr(bra, '>');
56         if (!ket)
57                 return 0;
58
59         for (dst = email, cp = bra+1; cp < ket; )
60                 *dst++ = *cp++;
61         *dst = 0;
62         for (cp = line; isspace(*cp); cp++)
63                 ;
64         for (bra--; isspace(*bra); bra--)
65                 *bra = 0;
66         cp = sanity_check(cp, email);
67         strcpy(name, cp);
68         return 1;
69 }
70
71 static int handle_from(char *in_line)
72 {
73         char line[1000];
74         char *at;
75         char *dst;
76
77         strcpy(line, in_line);
78         at = strchr(line, '@');
79         if (!at)
80                 return bogus_from(line);
81
82         /*
83          * If we already have one email, don't take any confusing lines
84          */
85         if (*email && strchr(at+1, '@'))
86                 return 0;
87
88         /* Pick up the string around '@', possibly delimited with <>
89          * pair; that is the email part.  White them out while copying.
90          */
91         while (at > line) {
92                 char c = at[-1];
93                 if (isspace(c))
94                         break;
95                 if (c == '<') {
96                         at[-1] = ' ';
97                         break;
98                 }
99                 at--;
100         }
101         dst = email;
102         for (;;) {
103                 unsigned char c = *at;
104                 if (!c || c == '>' || isspace(c)) {
105                         if (c == '>')
106                                 *at = ' ';
107                         break;
108                 }
109                 *at++ = ' ';
110                 *dst++ = c;
111         }
112         *dst++ = 0;
113
114         /* The remainder is name.  It could be "John Doe <john.doe@xz>"
115          * or "john.doe@xz (John Doe)", but we have whited out the
116          * email part, so trim from both ends, possibly removing
117          * the () pair at the end.
118          */
119         at = line + strlen(line);
120         while (at > line) {
121                 unsigned char c = *--at;
122                 if (!isspace(c)) {
123                         at[(c == ')') ? 0 : 1] = 0;
124                         break;
125                 }
126         }
127
128         at = line;
129         for (;;) {
130                 unsigned char c = *at;
131                 if (!c || !isspace(c)) {
132                         if (c == '(')
133                                 at++;
134                         break;
135                 }
136                 at++;
137         }
138         at = sanity_check(at, email);
139         strcpy(name, at);
140         return 1;
141 }
142
143 static int handle_header(char *line, char *data, int ofs)
144 {
145         if (!line || !data)
146                 return 1;
147
148         strcpy(data, line+ofs);
149
150         return 0;
151 }
152
153 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
154  * to have enough heuristics to grok MIME encoded patches often found
155  * on our mailing lists.  For example, we do not even treat header lines
156  * case insensitively.
157  */
158
159 static int slurp_attr(const char *line, const char *name, char *attr)
160 {
161         const char *ends, *ap = strcasestr(line, name);
162         size_t sz;
163
164         if (!ap) {
165                 *attr = 0;
166                 return 0;
167         }
168         ap += strlen(name);
169         if (*ap == '"') {
170                 ap++;
171                 ends = "\"";
172         }
173         else
174                 ends = "; \t";
175         sz = strcspn(ap, ends);
176         memcpy(attr, ap, sz);
177         attr[sz] = 0;
178         return 1;
179 }
180
181 struct content_type {
182         char *boundary;
183         int boundary_len;
184 };
185
186 static struct content_type content[MAX_BOUNDARIES];
187
188 static struct content_type *content_top = content;
189
190 static int handle_content_type(char *line)
191 {
192         char boundary[256];
193
194         if (strcasestr(line, "text/") == NULL)
195                  message_type = TYPE_OTHER;
196         if (slurp_attr(line, "boundary=", boundary + 2)) {
197                 memcpy(boundary, "--", 2);
198                 if (content_top++ >= &content[MAX_BOUNDARIES]) {
199                         fprintf(stderr, "Too many boundaries to handle\n");
200                         exit(1);
201                 }
202                 content_top->boundary_len = strlen(boundary);
203                 content_top->boundary = xmalloc(content_top->boundary_len+1);
204                 strcpy(content_top->boundary, boundary);
205         }
206         if (slurp_attr(line, "charset=", charset)) {
207                 int i, c;
208                 for (i = 0; (c = charset[i]) != 0; i++)
209                         charset[i] = tolower(c);
210         }
211         return 0;
212 }
213
214 static int handle_content_transfer_encoding(char *line)
215 {
216         if (strcasestr(line, "base64"))
217                 transfer_encoding = TE_BASE64;
218         else if (strcasestr(line, "quoted-printable"))
219                 transfer_encoding = TE_QP;
220         else
221                 transfer_encoding = TE_DONTCARE;
222         return 0;
223 }
224
225 static int is_multipart_boundary(const char *line)
226 {
227         return (!memcmp(line, content_top->boundary, content_top->boundary_len));
228 }
229
230 static int eatspace(char *line)
231 {
232         int len = strlen(line);
233         while (len > 0 && isspace(line[len-1]))
234                 line[--len] = 0;
235         return len;
236 }
237
238 static char *cleanup_subject(char *subject)
239 {
240         for (;;) {
241                 char *p;
242                 int len, remove;
243                 switch (*subject) {
244                 case 'r': case 'R':
245                         if (!memcmp("e:", subject+1, 2)) {
246                                 subject += 3;
247                                 continue;
248                         }
249                         break;
250                 case ' ': case '\t': case ':':
251                         subject++;
252                         continue;
253
254                 case '[':
255                         p = strchr(subject, ']');
256                         if (!p) {
257                                 subject++;
258                                 continue;
259                         }
260                         len = strlen(p);
261                         remove = p - subject;
262                         if (remove <= len *2) {
263                                 subject = p+1;
264                                 continue;
265                         }
266                         break;
267                 }
268                 eatspace(subject);
269                 return subject;
270         }
271 }
272
273 static void cleanup_space(char *buf)
274 {
275         unsigned char c;
276         while ((c = *buf) != 0) {
277                 buf++;
278                 if (isspace(c)) {
279                         buf[-1] = ' ';
280                         c = *buf;
281                         while (isspace(c)) {
282                                 int len = strlen(buf);
283                                 memmove(buf, buf+1, len);
284                                 c = *buf;
285                         }
286                 }
287         }
288 }
289
290 static void decode_header(char *it, unsigned itsize);
291 static const char *header[MAX_HDR_PARSED] = {
292         "From","Subject","Date",
293 };
294
295 static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
296 {
297         int i;
298
299         /* search for the interesting parts */
300         for (i = 0; header[i]; i++) {
301                 int len = strlen(header[i]);
302                 if ((!hdr_data[i] || overwrite) &&
303                     !strncasecmp(line, header[i], len) &&
304                     line[len] == ':' && isspace(line[len + 1])) {
305                         /* Unwrap inline B and Q encoding, and optionally
306                          * normalize the meta information to utf8.
307                          */
308                         decode_header(line + len + 2, linesize - len - 2);
309                         hdr_data[i] = xmalloc(1000 * sizeof(char));
310                         if (! handle_header(line, hdr_data[i], len + 2)) {
311                                 return 1;
312                         }
313                 }
314         }
315
316         /* Content stuff */
317         if (!strncasecmp(line, "Content-Type", 12) &&
318                 line[12] == ':' && isspace(line[12 + 1])) {
319                 decode_header(line + 12 + 2, linesize - 12 - 2);
320                 if (! handle_content_type(line)) {
321                         return 1;
322                 }
323         }
324         if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
325                 line[25] == ':' && isspace(line[25 + 1])) {
326                 decode_header(line + 25 + 2, linesize - 25 - 2);
327                 if (! handle_content_transfer_encoding(line)) {
328                         return 1;
329                 }
330         }
331
332         /* for inbody stuff */
333         if (!memcmp(">From", line, 5) && isspace(line[5]))
334                 return 1;
335         if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
336                 for (i = 0; header[i]; i++) {
337                         if (!memcmp("Subject: ", header[i], 9)) {
338                                 if (! handle_header(line, hdr_data[i], 0)) {
339                                         return 1;
340                                 }
341                         }
342                 }
343         }
344
345         /* no match */
346         return 0;
347 }
348
349 static int is_rfc2822_header(char *line)
350 {
351         /*
352          * The section that defines the loosest possible
353          * field name is "3.6.8 Optional fields".
354          *
355          * optional-field = field-name ":" unstructured CRLF
356          * field-name = 1*ftext
357          * ftext = %d33-57 / %59-126
358          */
359         int ch;
360         char *cp = line;
361
362         /* Count mbox From headers as headers */
363         if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
364                 return 1;
365
366         while ((ch = *cp++)) {
367                 if (ch == ':')
368                         return cp != line;
369                 if ((33 <= ch && ch <= 57) ||
370                     (59 <= ch && ch <= 126))
371                         continue;
372                 break;
373         }
374         return 0;
375 }
376
377 /*
378  * sz is size of 'line' buffer in bytes.  Must be reasonably
379  * long enough to hold one physical real-world e-mail line.
380  */
381 static int read_one_header_line(char *line, int sz, FILE *in)
382 {
383         int len;
384
385         /*
386          * We will read at most (sz-1) bytes and then potentially
387          * re-add NUL after it.  Accessing line[sz] after this is safe
388          * and we can allow len to grow up to and including sz.
389          */
390         sz--;
391
392         /* Get the first part of the line. */
393         if (!fgets(line, sz, in))
394                 return 0;
395
396         /*
397          * Is it an empty line or not a valid rfc2822 header?
398          * If so, stop here, and return false ("not a header")
399          */
400         len = eatspace(line);
401         if (!len || !is_rfc2822_header(line)) {
402                 /* Re-add the newline */
403                 line[len] = '\n';
404                 line[len + 1] = '\0';
405                 return 0;
406         }
407
408         /*
409          * Now we need to eat all the continuation lines..
410          * Yuck, 2822 header "folding"
411          */
412         for (;;) {
413                 int peek, addlen;
414                 static char continuation[1000];
415
416                 peek = fgetc(in); ungetc(peek, in);
417                 if (peek != ' ' && peek != '\t')
418                         break;
419                 if (!fgets(continuation, sizeof(continuation), in))
420                         break;
421                 addlen = eatspace(continuation);
422                 if (len < sz - 1) {
423                         if (addlen >= sz - len)
424                                 addlen = sz - len - 1;
425                         memcpy(line + len, continuation, addlen);
426                         line[len] = '\n';
427                         len += addlen;
428                 }
429         }
430         line[len] = 0;
431
432         return 1;
433 }
434
435 static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
436 {
437         char *otend = ot + otsize;
438         int c;
439         while ((c = *in++) != 0 && (in <= ep)) {
440                 if (ot == otend) {
441                         *--ot = '\0';
442                         return -1;
443                 }
444                 if (c == '=') {
445                         int d = *in++;
446                         if (d == '\n' || !d)
447                                 break; /* drop trailing newline */
448                         *ot++ = ((hexval(d) << 4) | hexval(*in++));
449                         continue;
450                 }
451                 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
452                         c = 0x20;
453                 *ot++ = c;
454         }
455         *ot = 0;
456         return 0;
457 }
458
459 static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
460 {
461         /* Decode in..ep, possibly in-place to ot */
462         int c, pos = 0, acc = 0;
463         char *otend = ot + otsize;
464
465         while ((c = *in++) != 0 && (in <= ep)) {
466                 if (ot == otend) {
467                         *--ot = '\0';
468                         return -1;
469                 }
470                 if (c == '+')
471                         c = 62;
472                 else if (c == '/')
473                         c = 63;
474                 else if ('A' <= c && c <= 'Z')
475                         c -= 'A';
476                 else if ('a' <= c && c <= 'z')
477                         c -= 'a' - 26;
478                 else if ('0' <= c && c <= '9')
479                         c -= '0' - 52;
480                 else if (c == '=') {
481                         /* padding is almost like (c == 0), except we do
482                          * not output NUL resulting only from it;
483                          * for now we just trust the data.
484                          */
485                         c = 0;
486                 }
487                 else
488                         continue; /* garbage */
489                 switch (pos++) {
490                 case 0:
491                         acc = (c << 2);
492                         break;
493                 case 1:
494                         *ot++ = (acc | (c >> 4));
495                         acc = (c & 15) << 4;
496                         break;
497                 case 2:
498                         *ot++ = (acc | (c >> 2));
499                         acc = (c & 3) << 6;
500                         break;
501                 case 3:
502                         *ot++ = (acc | c);
503                         acc = pos = 0;
504                         break;
505                 }
506         }
507         *ot = 0;
508         return 0;
509 }
510
511 /*
512  * When there is no known charset, guess.
513  *
514  * Right now we assume that if the target is UTF-8 (the default),
515  * and it already looks like UTF-8 (which includes US-ASCII as its
516  * subset, of course) then that is what it is and there is nothing
517  * to do.
518  *
519  * Otherwise, we default to assuming it is Latin1 for historical
520  * reasons.
521  */
522 static const char *guess_charset(const char *line, const char *target_charset)
523 {
524         if (is_encoding_utf8(target_charset)) {
525                 if (is_utf8(line))
526                         return NULL;
527         }
528         return "latin1";
529 }
530
531 static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
532 {
533         char *out;
534
535         if (!charset || !*charset) {
536                 charset = guess_charset(line, metainfo_charset);
537                 if (!charset)
538                         return;
539         }
540
541         if (!strcmp(metainfo_charset, charset))
542                 return;
543         out = reencode_string(line, metainfo_charset, charset);
544         if (!out)
545                 die("cannot convert from %s to %s\n",
546                     charset, metainfo_charset);
547         strlcpy(line, out, linesize);
548         free(out);
549 }
550
551 static int decode_header_bq(char *it, unsigned itsize)
552 {
553         char *in, *out, *ep, *cp, *sp;
554         char outbuf[1000];
555         int rfc2047 = 0;
556
557         in = it;
558         out = outbuf;
559         while ((ep = strstr(in, "=?")) != NULL) {
560                 int sz, encoding;
561                 char charset_q[256], piecebuf[256];
562                 rfc2047 = 1;
563
564                 if (in != ep) {
565                         sz = ep - in;
566                         memcpy(out, in, sz);
567                         out += sz;
568                         in += sz;
569                 }
570                 /* E.g.
571                  * ep : "=?iso-2022-jp?B?GyR...?= foo"
572                  * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
573                  */
574                 ep += 2;
575                 cp = strchr(ep, '?');
576                 if (!cp)
577                         return rfc2047; /* no munging */
578                 for (sp = ep; sp < cp; sp++)
579                         charset_q[sp - ep] = tolower(*sp);
580                 charset_q[cp - ep] = 0;
581                 encoding = cp[1];
582                 if (!encoding || cp[2] != '?')
583                         return rfc2047; /* no munging */
584                 ep = strstr(cp + 3, "?=");
585                 if (!ep)
586                         return rfc2047; /* no munging */
587                 switch (tolower(encoding)) {
588                 default:
589                         return rfc2047; /* no munging */
590                 case 'b':
591                         sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
592                         break;
593                 case 'q':
594                         sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
595                         break;
596                 }
597                 if (sz < 0)
598                         return rfc2047;
599                 if (metainfo_charset)
600                         convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
601
602                 sz = strlen(piecebuf);
603                 if (outbuf + sizeof(outbuf) <= out + sz)
604                         return rfc2047; /* no munging */
605                 strcpy(out, piecebuf);
606                 out += sz;
607                 in = ep + 2;
608         }
609         strcpy(out, in);
610         strlcpy(it, outbuf, itsize);
611         return rfc2047;
612 }
613
614 static void decode_header(char *it, unsigned itsize)
615 {
616
617         if (decode_header_bq(it, itsize))
618                 return;
619         /* otherwise "it" is a straight copy of the input.
620          * This can be binary guck but there is no charset specified.
621          */
622         if (metainfo_charset)
623                 convert_to_utf8(it, itsize, "");
624 }
625
626 static void decode_transfer_encoding(char *line, unsigned linesize)
627 {
628         char *ep;
629
630         switch (transfer_encoding) {
631         case TE_QP:
632                 ep = line + strlen(line);
633                 decode_q_segment(line, line, linesize, ep, 0);
634                 break;
635         case TE_BASE64:
636                 ep = line + strlen(line);
637                 decode_b_segment(line, line, linesize, ep);
638                 break;
639         case TE_DONTCARE:
640                 break;
641         }
642 }
643
644 static int handle_filter(char *line, unsigned linesize);
645
646 static int find_boundary(void)
647 {
648         while(fgets(line, sizeof(line), fin) != NULL) {
649                 if (is_multipart_boundary(line))
650                         return 1;
651         }
652         return 0;
653 }
654
655 static int handle_boundary(void)
656 {
657         char newline[]="\n";
658 again:
659         if (!memcmp(line+content_top->boundary_len, "--", 2)) {
660                 /* we hit an end boundary */
661                 /* pop the current boundary off the stack */
662                 free(content_top->boundary);
663
664                 /* technically won't happen as is_multipart_boundary()
665                    will fail first.  But just in case..
666                  */
667                 if (content_top-- < content) {
668                         fprintf(stderr, "Detected mismatched boundaries, "
669                                         "can't recover\n");
670                         exit(1);
671                 }
672                 handle_filter(newline, sizeof(newline));
673
674                 /* skip to the next boundary */
675                 if (!find_boundary())
676                         return 0;
677                 goto again;
678         }
679
680         /* set some defaults */
681         transfer_encoding = TE_DONTCARE;
682         charset[0] = 0;
683         message_type = TYPE_TEXT;
684
685         /* slurp in this section's info */
686         while (read_one_header_line(line, sizeof(line), fin))
687                 check_header(line, sizeof(line), p_hdr_data, 0);
688
689         /* eat the blank line after section info */
690         return (fgets(line, sizeof(line), fin) != NULL);
691 }
692
693 static inline int patchbreak(const char *line)
694 {
695         /* Beginning of a "diff -" header? */
696         if (!memcmp("diff -", line, 6))
697                 return 1;
698
699         /* CVS "Index: " line? */
700         if (!memcmp("Index: ", line, 7))
701                 return 1;
702
703         /*
704          * "--- <filename>" starts patches without headers
705          * "---<sp>*" is a manual separator
706          */
707         if (!memcmp("---", line, 3)) {
708                 line += 3;
709                 /* space followed by a filename? */
710                 if (line[0] == ' ' && !isspace(line[1]))
711                         return 1;
712                 /* Just whitespace? */
713                 for (;;) {
714                         unsigned char c = *line++;
715                         if (c == '\n')
716                                 return 1;
717                         if (!isspace(c))
718                                 break;
719                 }
720                 return 0;
721         }
722         return 0;
723 }
724
725
726 static int handle_commit_msg(char *line, unsigned linesize)
727 {
728         static int still_looking = 1;
729         char *endline = line + linesize;
730
731         if (!cmitmsg)
732                 return 0;
733
734         if (still_looking) {
735                 char *cp = line;
736                 if (isspace(*line)) {
737                         for (cp = line + 1; *cp; cp++) {
738                                 if (!isspace(*cp))
739                                         break;
740                         }
741                         if (!*cp)
742                                 return 0;
743                 }
744                 if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
745                         return 0;
746         }
747
748         /* normalize the log message to UTF-8. */
749         if (metainfo_charset)
750                 convert_to_utf8(line, endline - line, charset);
751
752         if (patchbreak(line)) {
753                 fclose(cmitmsg);
754                 cmitmsg = NULL;
755                 return 1;
756         }
757
758         fputs(line, cmitmsg);
759         return 0;
760 }
761
762 static int handle_patch(char *line)
763 {
764         fputs(line, patchfile);
765         patch_lines++;
766         return 0;
767 }
768
769 static int handle_filter(char *line, unsigned linesize)
770 {
771         static int filter = 0;
772
773         /* filter tells us which part we left off on
774          * a non-zero return indicates we hit a filter point
775          */
776         switch (filter) {
777         case 0:
778                 if (!handle_commit_msg(line, linesize))
779                         break;
780                 filter++;
781         case 1:
782                 if (!handle_patch(line))
783                         break;
784                 filter++;
785         default:
786                 return 1;
787         }
788
789         return 0;
790 }
791
792 static void handle_body(void)
793 {
794         int rc = 0;
795         static char newline[2000];
796         static char *np = newline;
797
798         /* Skip up to the first boundary */
799         if (content_top->boundary) {
800                 if (!find_boundary())
801                         return;
802         }
803
804         do {
805                 /* process any boundary lines */
806                 if (content_top->boundary && is_multipart_boundary(line)) {
807                         /* flush any leftover */
808                         if ((transfer_encoding == TE_BASE64)  &&
809                             (np != newline)) {
810                                 handle_filter(newline, sizeof(newline));
811                         }
812                         if (!handle_boundary())
813                                 return;
814                 }
815
816                 /* Unwrap transfer encoding */
817                 decode_transfer_encoding(line, sizeof(line));
818
819                 switch (transfer_encoding) {
820                 case TE_BASE64:
821                 case TE_QP:
822                 {
823                         char *op = line;
824
825                         /* binary data most likely doesn't have newlines */
826                         if (message_type != TYPE_TEXT) {
827                                 rc = handle_filter(line, sizeof(newline));
828                                 break;
829                         }
830
831                         /* this is a decoded line that may contain
832                          * multiple new lines.  Pass only one chunk
833                          * at a time to handle_filter()
834                          */
835
836                         do {
837                                 while (*op != '\n' && *op != 0)
838                                         *np++ = *op++;
839                                 *np = *op;
840                                 if (*np != 0) {
841                                         /* should be sitting on a new line */
842                                         *(++np) = 0;
843                                         op++;
844                                         rc = handle_filter(newline, sizeof(newline));
845                                         np = newline;
846                                 }
847                         } while (*op != 0);
848                         /* the partial chunk is saved in newline and
849                          * will be appended by the next iteration of fgets
850                          */
851                         break;
852                 }
853                 default:
854                         rc = handle_filter(line, sizeof(newline));
855                 }
856                 if (rc)
857                         /* nothing left to filter */
858                         break;
859         } while (fgets(line, sizeof(line), fin));
860
861         return;
862 }
863
864 static void output_header_lines(FILE *fout, const char *hdr, char *data)
865 {
866         while (1) {
867                 char *ep = strchr(data, '\n');
868                 int len;
869                 if (!ep)
870                         len = strlen(data);
871                 else
872                         len = ep - data;
873                 fprintf(fout, "%s: %.*s\n", hdr, len, data);
874                 if (!ep)
875                         break;
876                 data = ep + 1;
877         }
878 }
879
880 static void handle_info(void)
881 {
882         char *sub;
883         char *hdr;
884         int i;
885
886         for (i = 0; header[i]; i++) {
887
888                 /* only print inbody headers if we output a patch file */
889                 if (patch_lines && s_hdr_data[i])
890                         hdr = s_hdr_data[i];
891                 else if (p_hdr_data[i])
892                         hdr = p_hdr_data[i];
893                 else
894                         continue;
895
896                 if (!memcmp(header[i], "Subject", 7)) {
897                         if (keep_subject)
898                                 sub = hdr;
899                         else {
900                                 sub = cleanup_subject(hdr);
901                                 cleanup_space(sub);
902                         }
903                         output_header_lines(fout, "Subject", sub);
904                 } else if (!memcmp(header[i], "From", 4)) {
905                         handle_from(hdr);
906                         fprintf(fout, "Author: %s\n", name);
907                         fprintf(fout, "Email: %s\n", email);
908                 } else {
909                         cleanup_space(hdr);
910                         fprintf(fout, "%s: %s\n", header[i], hdr);
911                 }
912         }
913         fprintf(fout, "\n");
914 }
915
916 static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
917                     const char *msg, const char *patch)
918 {
919         int peek;
920         keep_subject = ks;
921         metainfo_charset = encoding;
922         fin = in;
923         fout = out;
924
925         cmitmsg = fopen(msg, "w");
926         if (!cmitmsg) {
927                 perror(msg);
928                 return -1;
929         }
930         patchfile = fopen(patch, "w");
931         if (!patchfile) {
932                 perror(patch);
933                 fclose(cmitmsg);
934                 return -1;
935         }
936
937         p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
938         s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
939
940         do {
941                 peek = fgetc(in);
942         } while (isspace(peek));
943         ungetc(peek, in);
944
945         /* process the email header */
946         while (read_one_header_line(line, sizeof(line), fin))
947                 check_header(line, sizeof(line), p_hdr_data, 1);
948
949         handle_body();
950         handle_info();
951
952         return 0;
953 }
954
955 static const char mailinfo_usage[] =
956         "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info";
957
958 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
959 {
960         const char *def_charset;
961
962         /* NEEDSWORK: might want to do the optional .git/ directory
963          * discovery
964          */
965         git_config(git_default_config);
966
967         def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
968         metainfo_charset = def_charset;
969
970         while (1 < argc && argv[1][0] == '-') {
971                 if (!strcmp(argv[1], "-k"))
972                         keep_subject = 1;
973                 else if (!strcmp(argv[1], "-u"))
974                         metainfo_charset = def_charset;
975                 else if (!strcmp(argv[1], "-n"))
976                         metainfo_charset = NULL;
977                 else if (!prefixcmp(argv[1], "--encoding="))
978                         metainfo_charset = argv[1] + 11;
979                 else
980                         usage(mailinfo_usage);
981                 argc--; argv++;
982         }
983
984         if (argc != 3)
985                 usage(mailinfo_usage);
986
987         return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]);
988 }