Merge branch 'jk/shortlog-group-by-trailer'
[git] / builtin / shortlog.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "config.h"
4 #include "commit.h"
5 #include "diff.h"
6 #include "string-list.h"
7 #include "revision.h"
8 #include "utf8.h"
9 #include "mailmap.h"
10 #include "shortlog.h"
11 #include "parse-options.h"
12 #include "trailer.h"
13
14 static char const * const shortlog_usage[] = {
15         N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
16         N_("git log --pretty=short | git shortlog [<options>]"),
17         NULL
18 };
19
20 /*
21  * The util field of our string_list_items will contain one of two things:
22  *
23  *   - if --summary is not in use, it will point to a string list of the
24  *     oneline subjects assigned to this author
25  *
26  *   - if --summary is in use, we don't need that list; we only need to know
27  *     its size. So we abuse the pointer slot to store our integer counter.
28  *
29  *  This macro accesses the latter.
30  */
31 #define UTIL_TO_INT(x) ((intptr_t)(x)->util)
32
33 static int compare_by_counter(const void *a1, const void *a2)
34 {
35         const struct string_list_item *i1 = a1, *i2 = a2;
36         return UTIL_TO_INT(i2) - UTIL_TO_INT(i1);
37 }
38
39 static int compare_by_list(const void *a1, const void *a2)
40 {
41         const struct string_list_item *i1 = a1, *i2 = a2;
42         const struct string_list *l1 = i1->util, *l2 = i2->util;
43
44         if (l1->nr < l2->nr)
45                 return 1;
46         else if (l1->nr == l2->nr)
47                 return 0;
48         else
49                 return -1;
50 }
51
52 static void insert_one_record(struct shortlog *log,
53                               const char *ident,
54                               const char *oneline)
55 {
56         struct string_list_item *item;
57
58         item = string_list_insert(&log->list, ident);
59
60         if (log->summary)
61                 item->util = (void *)(UTIL_TO_INT(item) + 1);
62         else {
63                 const char *dot3 = log->common_repo_prefix;
64                 char *buffer, *p;
65                 struct strbuf subject = STRBUF_INIT;
66                 const char *eol;
67
68                 /* Skip any leading whitespace, including any blank lines. */
69                 while (*oneline && isspace(*oneline))
70                         oneline++;
71                 eol = strchr(oneline, '\n');
72                 if (!eol)
73                         eol = oneline + strlen(oneline);
74                 if (starts_with(oneline, "[PATCH")) {
75                         char *eob = strchr(oneline, ']');
76                         if (eob && (!eol || eob < eol))
77                                 oneline = eob + 1;
78                 }
79                 while (*oneline && isspace(*oneline) && *oneline != '\n')
80                         oneline++;
81                 format_subject(&subject, oneline, " ");
82                 buffer = strbuf_detach(&subject, NULL);
83
84                 if (dot3) {
85                         int dot3len = strlen(dot3);
86                         if (dot3len > 5) {
87                                 while ((p = strstr(buffer, dot3)) != NULL) {
88                                         int taillen = strlen(p) - dot3len;
89                                         memcpy(p, "/.../", 5);
90                                         memmove(p + 5, p + dot3len, taillen + 1);
91                                 }
92                         }
93                 }
94
95                 if (item->util == NULL)
96                         item->util = xcalloc(1, sizeof(struct string_list));
97                 string_list_append(item->util, buffer);
98         }
99 }
100
101 static int parse_ident(struct shortlog *log,
102                        struct strbuf *out, const char *in)
103 {
104         const char *mailbuf, *namebuf;
105         size_t namelen, maillen;
106         struct ident_split ident;
107
108         if (split_ident_line(&ident, in, strlen(in)))
109                 return -1;
110
111         namebuf = ident.name_begin;
112         mailbuf = ident.mail_begin;
113         namelen = ident.name_end - ident.name_begin;
114         maillen = ident.mail_end - ident.mail_begin;
115
116         map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
117         strbuf_add(out, namebuf, namelen);
118         if (log->email)
119                 strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
120
121         return 0;
122 }
123
124 static void read_from_stdin(struct shortlog *log)
125 {
126         struct strbuf ident = STRBUF_INIT;
127         struct strbuf mapped_ident = STRBUF_INIT;
128         struct strbuf oneline = STRBUF_INIT;
129         static const char *author_match[2] = { "Author: ", "author " };
130         static const char *committer_match[2] = { "Commit: ", "committer " };
131         const char **match;
132
133         if (HAS_MULTI_BITS(log->groups))
134                 die(_("using multiple --group options with stdin is not supported"));
135
136         switch (log->groups) {
137         case SHORTLOG_GROUP_AUTHOR:
138                 match = author_match;
139                 break;
140         case SHORTLOG_GROUP_COMMITTER:
141                 match = committer_match;
142                 break;
143         case SHORTLOG_GROUP_TRAILER:
144                 die(_("using --group=trailer with stdin is not supported"));
145         default:
146                 BUG("unhandled shortlog group");
147         }
148
149         while (strbuf_getline_lf(&ident, stdin) != EOF) {
150                 const char *v;
151                 if (!skip_prefix(ident.buf, match[0], &v) &&
152                     !skip_prefix(ident.buf, match[1], &v))
153                         continue;
154                 while (strbuf_getline_lf(&oneline, stdin) != EOF &&
155                        oneline.len)
156                         ; /* discard headers */
157                 while (strbuf_getline_lf(&oneline, stdin) != EOF &&
158                        !oneline.len)
159                         ; /* discard blanks */
160
161                 strbuf_reset(&mapped_ident);
162                 if (parse_ident(log, &mapped_ident, v) < 0)
163                         continue;
164
165                 insert_one_record(log, mapped_ident.buf, oneline.buf);
166         }
167         strbuf_release(&ident);
168         strbuf_release(&mapped_ident);
169         strbuf_release(&oneline);
170 }
171
172 struct strset_item {
173         struct hashmap_entry ent;
174         char value[FLEX_ARRAY];
175 };
176
177 struct strset {
178         struct hashmap map;
179 };
180
181 #define STRSET_INIT { { NULL } }
182
183 static int strset_item_hashcmp(const void *hash_data,
184                                const struct hashmap_entry *entry,
185                                const struct hashmap_entry *entry_or_key,
186                                const void *keydata)
187 {
188         const struct strset_item *a, *b;
189
190         a = container_of(entry, const struct strset_item, ent);
191         if (keydata)
192                 return strcmp(a->value, keydata);
193
194         b = container_of(entry_or_key, const struct strset_item, ent);
195         return strcmp(a->value, b->value);
196 }
197
198 /*
199  * Adds "str" to the set if it was not already present; returns true if it was
200  * already there.
201  */
202 static int strset_check_and_add(struct strset *ss, const char *str)
203 {
204         unsigned int hash = strhash(str);
205         struct strset_item *item;
206
207         if (!ss->map.table)
208                 hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
209
210         if (hashmap_get_from_hash(&ss->map, hash, str))
211                 return 1;
212
213         FLEX_ALLOC_STR(item, value, str);
214         hashmap_entry_init(&item->ent, hash);
215         hashmap_add(&ss->map, &item->ent);
216         return 0;
217 }
218
219 static void strset_clear(struct strset *ss)
220 {
221         if (!ss->map.table)
222                 return;
223         hashmap_free_entries(&ss->map, struct strset_item, ent);
224 }
225
226 static void insert_records_from_trailers(struct shortlog *log,
227                                          struct strset *dups,
228                                          struct commit *commit,
229                                          struct pretty_print_context *ctx,
230                                          const char *oneline)
231 {
232         struct trailer_iterator iter;
233         const char *commit_buffer, *body;
234         struct strbuf ident = STRBUF_INIT;
235
236         /*
237          * Using format_commit_message("%B") would be simpler here, but
238          * this saves us copying the message.
239          */
240         commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
241         body = strstr(commit_buffer, "\n\n");
242         if (!body)
243                 return;
244
245         trailer_iterator_init(&iter, body);
246         while (trailer_iterator_advance(&iter)) {
247                 const char *value = iter.val.buf;
248
249                 if (!string_list_has_string(&log->trailers, iter.key.buf))
250                         continue;
251
252                 strbuf_reset(&ident);
253                 if (!parse_ident(log, &ident, value))
254                         value = ident.buf;
255
256                 if (strset_check_and_add(dups, value))
257                         continue;
258                 insert_one_record(log, value, oneline);
259         }
260         trailer_iterator_release(&iter);
261
262         strbuf_release(&ident);
263         unuse_commit_buffer(commit, commit_buffer);
264 }
265
266 void shortlog_add_commit(struct shortlog *log, struct commit *commit)
267 {
268         struct strbuf ident = STRBUF_INIT;
269         struct strbuf oneline = STRBUF_INIT;
270         struct strset dups = STRSET_INIT;
271         struct pretty_print_context ctx = {0};
272         const char *oneline_str;
273
274         ctx.fmt = CMIT_FMT_USERFORMAT;
275         ctx.abbrev = log->abbrev;
276         ctx.print_email_subject = 1;
277         ctx.date_mode.type = DATE_NORMAL;
278         ctx.output_encoding = get_log_output_encoding();
279
280         if (!log->summary) {
281                 if (log->user_format)
282                         pretty_print_commit(&ctx, commit, &oneline);
283                 else
284                         format_commit_message(commit, "%s", &oneline, &ctx);
285         }
286         oneline_str = oneline.len ? oneline.buf : "<none>";
287
288         if (log->groups & SHORTLOG_GROUP_AUTHOR) {
289                 strbuf_reset(&ident);
290                 format_commit_message(commit,
291                                       log->email ? "%aN <%aE>" : "%aN",
292                                       &ident, &ctx);
293                 if (!HAS_MULTI_BITS(log->groups) ||
294                     !strset_check_and_add(&dups, ident.buf))
295                         insert_one_record(log, ident.buf, oneline_str);
296         }
297         if (log->groups & SHORTLOG_GROUP_COMMITTER) {
298                 strbuf_reset(&ident);
299                 format_commit_message(commit,
300                                       log->email ? "%cN <%cE>" : "%cN",
301                                       &ident, &ctx);
302                 if (!HAS_MULTI_BITS(log->groups) ||
303                     !strset_check_and_add(&dups, ident.buf))
304                         insert_one_record(log, ident.buf, oneline_str);
305         }
306         if (log->groups & SHORTLOG_GROUP_TRAILER) {
307                 insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
308         }
309
310         strset_clear(&dups);
311         strbuf_release(&ident);
312         strbuf_release(&oneline);
313 }
314
315 static void get_from_rev(struct rev_info *rev, struct shortlog *log)
316 {
317         struct commit *commit;
318
319         if (prepare_revision_walk(rev))
320                 die(_("revision walk setup failed"));
321         while ((commit = get_revision(rev)) != NULL)
322                 shortlog_add_commit(log, commit);
323 }
324
325 static int parse_uint(char const **arg, int comma, int defval)
326 {
327         unsigned long ul;
328         int ret;
329         char *endp;
330
331         ul = strtoul(*arg, &endp, 10);
332         if (*endp && *endp != comma)
333                 return -1;
334         if (ul > INT_MAX)
335                 return -1;
336         ret = *arg == endp ? defval : (int)ul;
337         *arg = *endp ? endp + 1 : endp;
338         return ret;
339 }
340
341 static const char wrap_arg_usage[] = "-w[<width>[,<indent1>[,<indent2>]]]";
342 #define DEFAULT_WRAPLEN 76
343 #define DEFAULT_INDENT1 6
344 #define DEFAULT_INDENT2 9
345
346 static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
347 {
348         struct shortlog *log = opt->value;
349
350         log->wrap_lines = !unset;
351         if (unset)
352                 return 0;
353         if (!arg) {
354                 log->wrap = DEFAULT_WRAPLEN;
355                 log->in1 = DEFAULT_INDENT1;
356                 log->in2 = DEFAULT_INDENT2;
357                 return 0;
358         }
359
360         log->wrap = parse_uint(&arg, ',', DEFAULT_WRAPLEN);
361         log->in1 = parse_uint(&arg, ',', DEFAULT_INDENT1);
362         log->in2 = parse_uint(&arg, '\0', DEFAULT_INDENT2);
363         if (log->wrap < 0 || log->in1 < 0 || log->in2 < 0)
364                 return error(wrap_arg_usage);
365         if (log->wrap &&
366             ((log->in1 && log->wrap <= log->in1) ||
367              (log->in2 && log->wrap <= log->in2)))
368                 return error(wrap_arg_usage);
369         return 0;
370 }
371
372 static int parse_group_option(const struct option *opt, const char *arg, int unset)
373 {
374         struct shortlog *log = opt->value;
375         const char *field;
376
377         if (unset) {
378                 log->groups = 0;
379                 string_list_clear(&log->trailers, 0);
380         } else if (!strcasecmp(arg, "author"))
381                 log->groups |= SHORTLOG_GROUP_AUTHOR;
382         else if (!strcasecmp(arg, "committer"))
383                 log->groups |= SHORTLOG_GROUP_COMMITTER;
384         else if (skip_prefix(arg, "trailer:", &field)) {
385                 log->groups |= SHORTLOG_GROUP_TRAILER;
386                 string_list_append(&log->trailers, field);
387         } else
388                 return error(_("unknown group type: %s"), arg);
389
390         return 0;
391 }
392
393
394 void shortlog_init(struct shortlog *log)
395 {
396         memset(log, 0, sizeof(*log));
397
398         read_mailmap(&log->mailmap, &log->common_repo_prefix);
399
400         log->list.strdup_strings = 1;
401         log->wrap = DEFAULT_WRAPLEN;
402         log->in1 = DEFAULT_INDENT1;
403         log->in2 = DEFAULT_INDENT2;
404         log->trailers.strdup_strings = 1;
405         log->trailers.cmp = strcasecmp;
406 }
407
408 int cmd_shortlog(int argc, const char **argv, const char *prefix)
409 {
410         struct shortlog log = { STRING_LIST_INIT_NODUP };
411         struct rev_info rev;
412         int nongit = !startup_info->have_repository;
413
414         const struct option options[] = {
415                 OPT_BIT('c', "committer", &log.groups,
416                         N_("Group by committer rather than author"),
417                         SHORTLOG_GROUP_COMMITTER),
418                 OPT_BOOL('n', "numbered", &log.sort_by_number,
419                          N_("sort output according to the number of commits per author")),
420                 OPT_BOOL('s', "summary", &log.summary,
421                          N_("Suppress commit descriptions, only provides commit count")),
422                 OPT_BOOL('e', "email", &log.email,
423                          N_("Show the email address of each author")),
424                 OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
425                         N_("Linewrap output"), PARSE_OPT_OPTARG,
426                         &parse_wrap_args),
427                 OPT_CALLBACK(0, "group", &log, N_("field"),
428                         N_("Group by field"), parse_group_option),
429                 OPT_END(),
430         };
431
432         struct parse_opt_ctx_t ctx;
433
434         git_config(git_default_config, NULL);
435         shortlog_init(&log);
436         repo_init_revisions(the_repository, &rev, prefix);
437         parse_options_start(&ctx, argc, argv, prefix, options,
438                             PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
439
440         for (;;) {
441                 switch (parse_options_step(&ctx, options, shortlog_usage)) {
442                 case PARSE_OPT_HELP:
443                 case PARSE_OPT_ERROR:
444                         exit(129);
445                 case PARSE_OPT_COMPLETE:
446                         exit(0);
447                 case PARSE_OPT_DONE:
448                         goto parse_done;
449                 }
450                 parse_revision_opt(&rev, &ctx, options, shortlog_usage);
451         }
452 parse_done:
453         argc = parse_options_end(&ctx);
454
455         if (nongit && argc > 1) {
456                 error(_("too many arguments given outside repository"));
457                 usage_with_options(shortlog_usage, options);
458         }
459
460         if (setup_revisions(argc, argv, &rev, NULL) != 1) {
461                 error(_("unrecognized argument: %s"), argv[1]);
462                 usage_with_options(shortlog_usage, options);
463         }
464
465         log.user_format = rev.commit_format == CMIT_FMT_USERFORMAT;
466         log.abbrev = rev.abbrev;
467         log.file = rev.diffopt.file;
468
469         if (!log.groups)
470                 log.groups = SHORTLOG_GROUP_AUTHOR;
471         string_list_sort(&log.trailers);
472
473         /* assume HEAD if from a tty */
474         if (!nongit && !rev.pending.nr && isatty(0))
475                 add_head_to_pending(&rev);
476         if (rev.pending.nr == 0) {
477                 if (isatty(0))
478                         fprintf(stderr, _("(reading log message from standard input)\n"));
479                 read_from_stdin(&log);
480         }
481         else
482                 get_from_rev(&rev, &log);
483
484         shortlog_output(&log);
485         if (log.file != stdout)
486                 fclose(log.file);
487         return 0;
488 }
489
490 static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
491                                      const struct shortlog *log)
492 {
493         strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap);
494         strbuf_addch(sb, '\n');
495 }
496
497 void shortlog_output(struct shortlog *log)
498 {
499         int i, j;
500         struct strbuf sb = STRBUF_INIT;
501
502         if (log->sort_by_number)
503                 QSORT(log->list.items, log->list.nr,
504                       log->summary ? compare_by_counter : compare_by_list);
505         for (i = 0; i < log->list.nr; i++) {
506                 const struct string_list_item *item = &log->list.items[i];
507                 if (log->summary) {
508                         fprintf(log->file, "%6d\t%s\n",
509                                 (int)UTIL_TO_INT(item), item->string);
510                 } else {
511                         struct string_list *onelines = item->util;
512                         fprintf(log->file, "%s (%d):\n",
513                                 item->string, onelines->nr);
514                         for (j = onelines->nr - 1; j >= 0; j--) {
515                                 const char *msg = onelines->items[j].string;
516
517                                 if (log->wrap_lines) {
518                                         strbuf_reset(&sb);
519                                         add_wrapped_shortlog_msg(&sb, msg, log);
520                                         fwrite(sb.buf, sb.len, 1, log->file);
521                                 }
522                                 else
523                                         fprintf(log->file, "      %s\n", msg);
524                         }
525                         putc('\n', log->file);
526                         onelines->strdup_strings = 1;
527                         string_list_clear(onelines, 0);
528                         free(onelines);
529                 }
530
531                 log->list.items[i].util = NULL;
532         }
533
534         strbuf_release(&sb);
535         log->list.strdup_strings = 1;
536         string_list_clear(&log->list, 1);
537         clear_mailmap(&log->mailmap);
538 }