Merge branch 'maint'
[git] / builtin-mailinfo.c
1 /*
2  * Another stupid program, this one parsing the headers of an
3  * email to figure out authorship and subject
4  */
5 #include "cache.h"
6 #include "builtin.h"
7 #include "utf8.h"
8
9 static FILE *cmitmsg, *patchfile, *fin, *fout;
10
11 static int keep_subject;
12 static const char *metainfo_charset;
13 static char line[1000];
14 static char name[1000];
15 static char email[1000];
16
17 static enum  {
18         TE_DONTCARE, TE_QP, TE_BASE64,
19 } transfer_encoding;
20 static enum  {
21         TYPE_TEXT, TYPE_OTHER,
22 } message_type;
23
24 static char charset[256];
25 static int patch_lines;
26 static char **p_hdr_data, **s_hdr_data;
27
28 #define MAX_HDR_PARSED 10
29 #define MAX_BOUNDARIES 5
30
31 static char *sanity_check(char *name, char *email)
32 {
33         int len = strlen(name);
34         if (len < 3 || len > 60)
35                 return email;
36         if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
37                 return email;
38         return name;
39 }
40
41 static int bogus_from(char *line)
42 {
43         /* John Doe <johndoe> */
44         char *bra, *ket, *dst, *cp;
45
46         /* This is fallback, so do not bother if we already have an
47          * e-mail address.
48          */
49         if (*email)
50                 return 0;
51
52         bra = strchr(line, '<');
53         if (!bra)
54                 return 0;
55         ket = strchr(bra, '>');
56         if (!ket)
57                 return 0;
58
59         for (dst = email, cp = bra+1; cp < ket; )
60                 *dst++ = *cp++;
61         *dst = 0;
62         for (cp = line; isspace(*cp); cp++)
63                 ;
64         for (bra--; isspace(*bra); bra--)
65                 *bra = 0;
66         cp = sanity_check(cp, email);
67         strcpy(name, cp);
68         return 1;
69 }
70
71 static int handle_from(char *in_line)
72 {
73         char line[1000];
74         char *at;
75         char *dst;
76
77         strcpy(line, in_line);
78         at = strchr(line, '@');
79         if (!at)
80                 return bogus_from(line);
81
82         /*
83          * If we already have one email, don't take any confusing lines
84          */
85         if (*email && strchr(at+1, '@'))
86                 return 0;
87
88         /* Pick up the string around '@', possibly delimited with <>
89          * pair; that is the email part.  White them out while copying.
90          */
91         while (at > line) {
92                 char c = at[-1];
93                 if (isspace(c))
94                         break;
95                 if (c == '<') {
96                         at[-1] = ' ';
97                         break;
98                 }
99                 at--;
100         }
101         dst = email;
102         for (;;) {
103                 unsigned char c = *at;
104                 if (!c || c == '>' || isspace(c)) {
105                         if (c == '>')
106                                 *at = ' ';
107                         break;
108                 }
109                 *at++ = ' ';
110                 *dst++ = c;
111         }
112         *dst++ = 0;
113
114         /* The remainder is name.  It could be "John Doe <john.doe@xz>"
115          * or "john.doe@xz (John Doe)", but we have whited out the
116          * email part, so trim from both ends, possibly removing
117          * the () pair at the end.
118          */
119         at = line + strlen(line);
120         while (at > line) {
121                 unsigned char c = *--at;
122                 if (!isspace(c)) {
123                         at[(c == ')') ? 0 : 1] = 0;
124                         break;
125                 }
126         }
127
128         at = line;
129         for (;;) {
130                 unsigned char c = *at;
131                 if (!c || !isspace(c)) {
132                         if (c == '(')
133                                 at++;
134                         break;
135                 }
136                 at++;
137         }
138         at = sanity_check(at, email);
139         strcpy(name, at);
140         return 1;
141 }
142
143 static int handle_header(char *line, char *data, int ofs)
144 {
145         if (!line || !data)
146                 return 1;
147
148         strcpy(data, line+ofs);
149
150         return 0;
151 }
152
153 /* NOTE NOTE NOTE.  We do not claim we do full MIME.  We just attempt
154  * to have enough heuristics to grok MIME encoded patches often found
155  * on our mailing lists.  For example, we do not even treat header lines
156  * case insensitively.
157  */
158
159 static int slurp_attr(const char *line, const char *name, char *attr)
160 {
161         const char *ends, *ap = strcasestr(line, name);
162         size_t sz;
163
164         if (!ap) {
165                 *attr = 0;
166                 return 0;
167         }
168         ap += strlen(name);
169         if (*ap == '"') {
170                 ap++;
171                 ends = "\"";
172         }
173         else
174                 ends = "; \t";
175         sz = strcspn(ap, ends);
176         memcpy(attr, ap, sz);
177         attr[sz] = 0;
178         return 1;
179 }
180
181 struct content_type {
182         char *boundary;
183         int boundary_len;
184 };
185
186 static struct content_type content[MAX_BOUNDARIES];
187
188 static struct content_type *content_top = content;
189
190 static int handle_content_type(char *line)
191 {
192         char boundary[256];
193
194         if (strcasestr(line, "text/") == NULL)
195                  message_type = TYPE_OTHER;
196         if (slurp_attr(line, "boundary=", boundary + 2)) {
197                 memcpy(boundary, "--", 2);
198                 if (content_top++ >= &content[MAX_BOUNDARIES]) {
199                         fprintf(stderr, "Too many boundaries to handle\n");
200                         exit(1);
201                 }
202                 content_top->boundary_len = strlen(boundary);
203                 content_top->boundary = xmalloc(content_top->boundary_len+1);
204                 strcpy(content_top->boundary, boundary);
205         }
206         if (slurp_attr(line, "charset=", charset)) {
207                 int i, c;
208                 for (i = 0; (c = charset[i]) != 0; i++)
209                         charset[i] = tolower(c);
210         }
211         return 0;
212 }
213
214 static int handle_content_transfer_encoding(char *line)
215 {
216         if (strcasestr(line, "base64"))
217                 transfer_encoding = TE_BASE64;
218         else if (strcasestr(line, "quoted-printable"))
219                 transfer_encoding = TE_QP;
220         else
221                 transfer_encoding = TE_DONTCARE;
222         return 0;
223 }
224
225 static int is_multipart_boundary(const char *line)
226 {
227         return (!memcmp(line, content_top->boundary, content_top->boundary_len));
228 }
229
230 static int eatspace(char *line)
231 {
232         int len = strlen(line);
233         while (len > 0 && isspace(line[len-1]))
234                 line[--len] = 0;
235         return len;
236 }
237
238 static char *cleanup_subject(char *subject)
239 {
240         for (;;) {
241                 char *p;
242                 int len, remove;
243                 switch (*subject) {
244                 case 'r': case 'R':
245                         if (!memcmp("e:", subject+1, 2)) {
246                                 subject += 3;
247                                 continue;
248                         }
249                         break;
250                 case ' ': case '\t': case ':':
251                         subject++;
252                         continue;
253
254                 case '[':
255                         p = strchr(subject, ']');
256                         if (!p) {
257                                 subject++;
258                                 continue;
259                         }
260                         len = strlen(p);
261                         remove = p - subject;
262                         if (remove <= len *2) {
263                                 subject = p+1;
264                                 continue;
265                         }
266                         break;
267                 }
268                 eatspace(subject);
269                 return subject;
270         }
271 }
272
273 static void cleanup_space(char *buf)
274 {
275         unsigned char c;
276         while ((c = *buf) != 0) {
277                 buf++;
278                 if (isspace(c)) {
279                         buf[-1] = ' ';
280                         c = *buf;
281                         while (isspace(c)) {
282                                 int len = strlen(buf);
283                                 memmove(buf, buf+1, len);
284                                 c = *buf;
285                         }
286                 }
287         }
288 }
289
290 static void decode_header(char *it);
291 static char *header[MAX_HDR_PARSED] = {
292         "From","Subject","Date",
293 };
294
295 static int check_header(char *line, char **hdr_data, int overwrite)
296 {
297         int i;
298
299         /* search for the interesting parts */
300         for (i = 0; header[i]; i++) {
301                 int len = strlen(header[i]);
302                 if ((!hdr_data[i] || overwrite) &&
303                     !strncasecmp(line, header[i], len) &&
304                     line[len] == ':' && isspace(line[len + 1])) {
305                         /* Unwrap inline B and Q encoding, and optionally
306                          * normalize the meta information to utf8.
307                          */
308                         decode_header(line + len + 2);
309                         hdr_data[i] = xmalloc(1000 * sizeof(char));
310                         if (! handle_header(line, hdr_data[i], len + 2)) {
311                                 return 1;
312                         }
313                 }
314         }
315
316         /* Content stuff */
317         if (!strncasecmp(line, "Content-Type", 12) &&
318                 line[12] == ':' && isspace(line[12 + 1])) {
319                 decode_header(line + 12 + 2);
320                 if (! handle_content_type(line)) {
321                         return 1;
322                 }
323         }
324         if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
325                 line[25] == ':' && isspace(line[25 + 1])) {
326                 decode_header(line + 25 + 2);
327                 if (! handle_content_transfer_encoding(line)) {
328                         return 1;
329                 }
330         }
331
332         /* for inbody stuff */
333         if (!memcmp(">From", line, 5) && isspace(line[5]))
334                 return 1;
335         if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) {
336                 for (i = 0; header[i]; i++) {
337                         if (!memcmp("Subject: ", header[i], 9)) {
338                                 if (! handle_header(line, hdr_data[i], 0)) {
339                                         return 1;
340                                 }
341                         }
342                 }
343         }
344
345         /* no match */
346         return 0;
347 }
348
349 static int is_rfc2822_header(char *line)
350 {
351         /*
352          * The section that defines the loosest possible
353          * field name is "3.6.8 Optional fields".
354          *
355          * optional-field = field-name ":" unstructured CRLF
356          * field-name = 1*ftext
357          * ftext = %d33-57 / %59-126
358          */
359         int ch;
360         char *cp = line;
361
362         /* Count mbox From headers as headers */
363         if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))
364                 return 1;
365
366         while ((ch = *cp++)) {
367                 if (ch == ':')
368                         return cp != line;
369                 if ((33 <= ch && ch <= 57) ||
370                     (59 <= ch && ch <= 126))
371                         continue;
372                 break;
373         }
374         return 0;
375 }
376
377 /*
378  * sz is size of 'line' buffer in bytes.  Must be reasonably
379  * long enough to hold one physical real-world e-mail line.
380  */
381 static int read_one_header_line(char *line, int sz, FILE *in)
382 {
383         int len;
384
385         /*
386          * We will read at most (sz-1) bytes and then potentially
387          * re-add NUL after it.  Accessing line[sz] after this is safe
388          * and we can allow len to grow up to and including sz.
389          */
390         sz--;
391
392         /* Get the first part of the line. */
393         if (!fgets(line, sz, in))
394                 return 0;
395
396         /*
397          * Is it an empty line or not a valid rfc2822 header?
398          * If so, stop here, and return false ("not a header")
399          */
400         len = eatspace(line);
401         if (!len || !is_rfc2822_header(line)) {
402                 /* Re-add the newline */
403                 line[len] = '\n';
404                 line[len + 1] = '\0';
405                 return 0;
406         }
407
408         /*
409          * Now we need to eat all the continuation lines..
410          * Yuck, 2822 header "folding"
411          */
412         for (;;) {
413                 int peek, addlen;
414                 static char continuation[1000];
415
416                 peek = fgetc(in); ungetc(peek, in);
417                 if (peek != ' ' && peek != '\t')
418                         break;
419                 if (!fgets(continuation, sizeof(continuation), in))
420                         break;
421                 addlen = eatspace(continuation);
422                 if (len < sz - 1) {
423                         if (addlen >= sz - len)
424                                 addlen = sz - len - 1;
425                         memcpy(line + len, continuation, addlen);
426                         line[len] = '\n';
427                         len += addlen;
428                 }
429         }
430         line[len] = 0;
431
432         return 1;
433 }
434
435 static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
436 {
437         int c;
438         while ((c = *in++) != 0 && (in <= ep)) {
439                 if (c == '=') {
440                         int d = *in++;
441                         if (d == '\n' || !d)
442                                 break; /* drop trailing newline */
443                         *ot++ = ((hexval(d) << 4) | hexval(*in++));
444                         continue;
445                 }
446                 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
447                         c = 0x20;
448                 *ot++ = c;
449         }
450         *ot = 0;
451         return 0;
452 }
453
454 static int decode_b_segment(char *in, char *ot, char *ep)
455 {
456         /* Decode in..ep, possibly in-place to ot */
457         int c, pos = 0, acc = 0;
458
459         while ((c = *in++) != 0 && (in <= ep)) {
460                 if (c == '+')
461                         c = 62;
462                 else if (c == '/')
463                         c = 63;
464                 else if ('A' <= c && c <= 'Z')
465                         c -= 'A';
466                 else if ('a' <= c && c <= 'z')
467                         c -= 'a' - 26;
468                 else if ('0' <= c && c <= '9')
469                         c -= '0' - 52;
470                 else if (c == '=') {
471                         /* padding is almost like (c == 0), except we do
472                          * not output NUL resulting only from it;
473                          * for now we just trust the data.
474                          */
475                         c = 0;
476                 }
477                 else
478                         continue; /* garbage */
479                 switch (pos++) {
480                 case 0:
481                         acc = (c << 2);
482                         break;
483                 case 1:
484                         *ot++ = (acc | (c >> 4));
485                         acc = (c & 15) << 4;
486                         break;
487                 case 2:
488                         *ot++ = (acc | (c >> 2));
489                         acc = (c & 3) << 6;
490                         break;
491                 case 3:
492                         *ot++ = (acc | c);
493                         acc = pos = 0;
494                         break;
495                 }
496         }
497         *ot = 0;
498         return 0;
499 }
500
501 /*
502  * When there is no known charset, guess.
503  *
504  * Right now we assume that if the target is UTF-8 (the default),
505  * and it already looks like UTF-8 (which includes US-ASCII as its
506  * subset, of course) then that is what it is and there is nothing
507  * to do.
508  *
509  * Otherwise, we default to assuming it is Latin1 for historical
510  * reasons.
511  */
512 static const char *guess_charset(const char *line, const char *target_charset)
513 {
514         if (is_encoding_utf8(target_charset)) {
515                 if (is_utf8(line))
516                         return NULL;
517         }
518         return "latin1";
519 }
520
521 static void convert_to_utf8(char *line, const char *charset)
522 {
523         char *out;
524
525         if (!charset || !*charset) {
526                 charset = guess_charset(line, metainfo_charset);
527                 if (!charset)
528                         return;
529         }
530
531         if (!strcmp(metainfo_charset, charset))
532                 return;
533         out = reencode_string(line, metainfo_charset, charset);
534         if (!out)
535                 die("cannot convert from %s to %s\n",
536                     charset, metainfo_charset);
537         strcpy(line, out);
538         free(out);
539 }
540
541 static int decode_header_bq(char *it)
542 {
543         char *in, *out, *ep, *cp, *sp;
544         char outbuf[1000];
545         int rfc2047 = 0;
546
547         in = it;
548         out = outbuf;
549         while ((ep = strstr(in, "=?")) != NULL) {
550                 int sz, encoding;
551                 char charset_q[256], piecebuf[256];
552                 rfc2047 = 1;
553
554                 if (in != ep) {
555                         sz = ep - in;
556                         memcpy(out, in, sz);
557                         out += sz;
558                         in += sz;
559                 }
560                 /* E.g.
561                  * ep : "=?iso-2022-jp?B?GyR...?= foo"
562                  * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
563                  */
564                 ep += 2;
565                 cp = strchr(ep, '?');
566                 if (!cp)
567                         return rfc2047; /* no munging */
568                 for (sp = ep; sp < cp; sp++)
569                         charset_q[sp - ep] = tolower(*sp);
570                 charset_q[cp - ep] = 0;
571                 encoding = cp[1];
572                 if (!encoding || cp[2] != '?')
573                         return rfc2047; /* no munging */
574                 ep = strstr(cp + 3, "?=");
575                 if (!ep)
576                         return rfc2047; /* no munging */
577                 switch (tolower(encoding)) {
578                 default:
579                         return rfc2047; /* no munging */
580                 case 'b':
581                         sz = decode_b_segment(cp + 3, piecebuf, ep);
582                         break;
583                 case 'q':
584                         sz = decode_q_segment(cp + 3, piecebuf, ep, 1);
585                         break;
586                 }
587                 if (sz < 0)
588                         return rfc2047;
589                 if (metainfo_charset)
590                         convert_to_utf8(piecebuf, charset_q);
591                 strcpy(out, piecebuf);
592                 out += strlen(out);
593                 in = ep + 2;
594         }
595         strcpy(out, in);
596         strcpy(it, outbuf);
597         return rfc2047;
598 }
599
600 static void decode_header(char *it)
601 {
602
603         if (decode_header_bq(it))
604                 return;
605         /* otherwise "it" is a straight copy of the input.
606          * This can be binary guck but there is no charset specified.
607          */
608         if (metainfo_charset)
609                 convert_to_utf8(it, "");
610 }
611
612 static void decode_transfer_encoding(char *line)
613 {
614         char *ep;
615
616         switch (transfer_encoding) {
617         case TE_QP:
618                 ep = line + strlen(line);
619                 decode_q_segment(line, line, ep, 0);
620                 break;
621         case TE_BASE64:
622                 ep = line + strlen(line);
623                 decode_b_segment(line, line, ep);
624                 break;
625         case TE_DONTCARE:
626                 break;
627         }
628 }
629
630 static int handle_filter(char *line);
631
632 static int find_boundary(void)
633 {
634         while(fgets(line, sizeof(line), fin) != NULL) {
635                 if (is_multipart_boundary(line))
636                         return 1;
637         }
638         return 0;
639 }
640
641 static int handle_boundary(void)
642 {
643         char newline[]="\n";
644 again:
645         if (!memcmp(line+content_top->boundary_len, "--", 2)) {
646                 /* we hit an end boundary */
647                 /* pop the current boundary off the stack */
648                 free(content_top->boundary);
649
650                 /* technically won't happen as is_multipart_boundary()
651                    will fail first.  But just in case..
652                  */
653                 if (content_top-- < content) {
654                         fprintf(stderr, "Detected mismatched boundaries, "
655                                         "can't recover\n");
656                         exit(1);
657                 }
658                 handle_filter(newline);
659
660                 /* skip to the next boundary */
661                 if (!find_boundary())
662                         return 0;
663                 goto again;
664         }
665
666         /* set some defaults */
667         transfer_encoding = TE_DONTCARE;
668         charset[0] = 0;
669         message_type = TYPE_TEXT;
670
671         /* slurp in this section's info */
672         while (read_one_header_line(line, sizeof(line), fin))
673                 check_header(line, p_hdr_data, 0);
674
675         /* eat the blank line after section info */
676         return (fgets(line, sizeof(line), fin) != NULL);
677 }
678
679 static inline int patchbreak(const char *line)
680 {
681         /* Beginning of a "diff -" header? */
682         if (!memcmp("diff -", line, 6))
683                 return 1;
684
685         /* CVS "Index: " line? */
686         if (!memcmp("Index: ", line, 7))
687                 return 1;
688
689         /*
690          * "--- <filename>" starts patches without headers
691          * "---<sp>*" is a manual separator
692          */
693         if (!memcmp("---", line, 3)) {
694                 line += 3;
695                 /* space followed by a filename? */
696                 if (line[0] == ' ' && !isspace(line[1]))
697                         return 1;
698                 /* Just whitespace? */
699                 for (;;) {
700                         unsigned char c = *line++;
701                         if (c == '\n')
702                                 return 1;
703                         if (!isspace(c))
704                                 break;
705                 }
706                 return 0;
707         }
708         return 0;
709 }
710
711
712 static int handle_commit_msg(char *line)
713 {
714         static int still_looking = 1;
715
716         if (!cmitmsg)
717                 return 0;
718
719         if (still_looking) {
720                 char *cp = line;
721                 if (isspace(*line)) {
722                         for (cp = line + 1; *cp; cp++) {
723                                 if (!isspace(*cp))
724                                         break;
725                         }
726                         if (!*cp)
727                                 return 0;
728                 }
729                 if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0)
730                         return 0;
731         }
732
733         /* normalize the log message to UTF-8. */
734         if (metainfo_charset)
735                 convert_to_utf8(line, charset);
736
737         if (patchbreak(line)) {
738                 fclose(cmitmsg);
739                 cmitmsg = NULL;
740                 return 1;
741         }
742
743         fputs(line, cmitmsg);
744         return 0;
745 }
746
747 static int handle_patch(char *line)
748 {
749         fputs(line, patchfile);
750         patch_lines++;
751         return 0;
752 }
753
754 static int handle_filter(char *line)
755 {
756         static int filter = 0;
757
758         /* filter tells us which part we left off on
759          * a non-zero return indicates we hit a filter point
760          */
761         switch (filter) {
762         case 0:
763                 if (!handle_commit_msg(line))
764                         break;
765                 filter++;
766         case 1:
767                 if (!handle_patch(line))
768                         break;
769                 filter++;
770         default:
771                 return 1;
772         }
773
774         return 0;
775 }
776
777 static void handle_body(void)
778 {
779         int rc = 0;
780         static char newline[2000];
781         static char *np = newline;
782
783         /* Skip up to the first boundary */
784         if (content_top->boundary) {
785                 if (!find_boundary())
786                         return;
787         }
788
789         do {
790                 /* process any boundary lines */
791                 if (content_top->boundary && is_multipart_boundary(line)) {
792                         /* flush any leftover */
793                         if ((transfer_encoding == TE_BASE64)  &&
794                             (np != newline)) {
795                                 handle_filter(newline);
796                         }
797                         if (!handle_boundary())
798                                 return;
799                 }
800
801                 /* Unwrap transfer encoding */
802                 decode_transfer_encoding(line);
803
804                 switch (transfer_encoding) {
805                 case TE_BASE64:
806                 {
807                         char *op = line;
808
809                         /* binary data most likely doesn't have newlines */
810                         if (message_type != TYPE_TEXT) {
811                                 rc = handle_filter(line);
812                                 break;
813                         }
814
815                         /* this is a decoded line that may contain
816                          * multiple new lines.  Pass only one chunk
817                          * at a time to handle_filter()
818                          */
819
820                         do {
821                                 while (*op != '\n' && *op != 0)
822                                         *np++ = *op++;
823                                 *np = *op;
824                                 if (*np != 0) {
825                                         /* should be sitting on a new line */
826                                         *(++np) = 0;
827                                         op++;
828                                         rc = handle_filter(newline);
829                                         np = newline;
830                                 }
831                         } while (*op != 0);
832                         /* the partial chunk is saved in newline and
833                          * will be appended by the next iteration of fgets
834                          */
835                         break;
836                 }
837                 default:
838                         rc = handle_filter(line);
839                 }
840                 if (rc)
841                         /* nothing left to filter */
842                         break;
843         } while (fgets(line, sizeof(line), fin));
844
845         return;
846 }
847
848 static void output_header_lines(FILE *fout, const char *hdr, char *data)
849 {
850         while (1) {
851                 char *ep = strchr(data, '\n');
852                 int len;
853                 if (!ep)
854                         len = strlen(data);
855                 else
856                         len = ep - data;
857                 fprintf(fout, "%s: %.*s\n", hdr, len, data);
858                 if (!ep)
859                         break;
860                 data = ep + 1;
861         }
862 }
863
864 static void handle_info(void)
865 {
866         char *sub;
867         char *hdr;
868         int i;
869
870         for (i = 0; header[i]; i++) {
871
872                 /* only print inbody headers if we output a patch file */
873                 if (patch_lines && s_hdr_data[i])
874                         hdr = s_hdr_data[i];
875                 else if (p_hdr_data[i])
876                         hdr = p_hdr_data[i];
877                 else
878                         continue;
879
880                 if (!memcmp(header[i], "Subject", 7)) {
881                         if (keep_subject)
882                                 sub = hdr;
883                         else {
884                                 sub = cleanup_subject(hdr);
885                                 cleanup_space(sub);
886                         }
887                         output_header_lines(fout, "Subject", sub);
888                 } else if (!memcmp(header[i], "From", 4)) {
889                         handle_from(hdr);
890                         fprintf(fout, "Author: %s\n", name);
891                         fprintf(fout, "Email: %s\n", email);
892                 } else {
893                         cleanup_space(hdr);
894                         fprintf(fout, "%s: %s\n", header[i], hdr);
895                 }
896         }
897         fprintf(fout, "\n");
898 }
899
900 static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
901                     const char *msg, const char *patch)
902 {
903         keep_subject = ks;
904         metainfo_charset = encoding;
905         fin = in;
906         fout = out;
907
908         cmitmsg = fopen(msg, "w");
909         if (!cmitmsg) {
910                 perror(msg);
911                 return -1;
912         }
913         patchfile = fopen(patch, "w");
914         if (!patchfile) {
915                 perror(patch);
916                 fclose(cmitmsg);
917                 return -1;
918         }
919
920         p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
921         s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
922
923         /* process the email header */
924         while (read_one_header_line(line, sizeof(line), fin))
925                 check_header(line, p_hdr_data, 1);
926
927         handle_body();
928         handle_info();
929
930         return 0;
931 }
932
933 static const char mailinfo_usage[] =
934         "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info";
935
936 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
937 {
938         const char *def_charset;
939
940         /* NEEDSWORK: might want to do the optional .git/ directory
941          * discovery
942          */
943         git_config(git_default_config);
944
945         def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
946         metainfo_charset = def_charset;
947
948         while (1 < argc && argv[1][0] == '-') {
949                 if (!strcmp(argv[1], "-k"))
950                         keep_subject = 1;
951                 else if (!strcmp(argv[1], "-u"))
952                         metainfo_charset = def_charset;
953                 else if (!strcmp(argv[1], "-n"))
954                         metainfo_charset = NULL;
955                 else if (!prefixcmp(argv[1], "--encoding="))
956                         metainfo_charset = argv[1] + 11;
957                 else
958                         usage(mailinfo_usage);
959                 argc--; argv++;
960         }
961
962         if (argc != 3)
963                 usage(mailinfo_usage);
964
965         return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]);
966 }