commit: add repository argument to set_commit_buffer
[git] / commit.c
1 #include "cache.h"
2 #include "tag.h"
3 #include "commit.h"
4 #include "commit-graph.h"
5 #include "repository.h"
6 #include "object-store.h"
7 #include "pkt-line.h"
8 #include "utf8.h"
9 #include "diff.h"
10 #include "revision.h"
11 #include "notes.h"
12 #include "alloc.h"
13 #include "gpg-interface.h"
14 #include "mergesort.h"
15 #include "commit-slab.h"
16 #include "prio-queue.h"
17 #include "sha1-lookup.h"
18 #include "wt-status.h"
19 #include "advice.h"
20
21 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
22
23 int save_commit_buffer = 1;
24
25 const char *commit_type = "commit";
26
27 struct commit *lookup_commit_reference_gently_the_repository(
28                 const struct object_id *oid, int quiet)
29 {
30         struct object *obj = deref_tag(parse_object(the_repository, oid),
31                                        NULL, 0);
32
33         if (!obj)
34                 return NULL;
35         return object_as_type(the_repository, obj, OBJ_COMMIT, quiet);
36 }
37
38 struct commit *lookup_commit_reference_the_repository(const struct object_id *oid)
39 {
40         return lookup_commit_reference_gently(the_repository, oid, 0);
41 }
42
43 struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name)
44 {
45         struct commit *c = lookup_commit_reference(the_repository, oid);
46         if (!c)
47                 die(_("could not parse %s"), ref_name);
48         if (oidcmp(oid, &c->object.oid)) {
49                 warning(_("%s %s is not a commit!"),
50                         ref_name, oid_to_hex(oid));
51         }
52         return c;
53 }
54
55 struct commit *lookup_commit_the_repository(const struct object_id *oid)
56 {
57         struct object *obj = lookup_object(the_repository, oid->hash);
58         if (!obj)
59                 return create_object(the_repository, oid->hash,
60                                      alloc_commit_node(the_repository));
61         return object_as_type(the_repository, obj, OBJ_COMMIT, 0);
62 }
63
64 struct commit *lookup_commit_reference_by_name(const char *name)
65 {
66         struct object_id oid;
67         struct commit *commit;
68
69         if (get_oid_committish(name, &oid))
70                 return NULL;
71         commit = lookup_commit_reference(the_repository, &oid);
72         if (parse_commit(commit))
73                 return NULL;
74         return commit;
75 }
76
77 static timestamp_t parse_commit_date(const char *buf, const char *tail)
78 {
79         const char *dateptr;
80
81         if (buf + 6 >= tail)
82                 return 0;
83         if (memcmp(buf, "author", 6))
84                 return 0;
85         while (buf < tail && *buf++ != '\n')
86                 /* nada */;
87         if (buf + 9 >= tail)
88                 return 0;
89         if (memcmp(buf, "committer", 9))
90                 return 0;
91         while (buf < tail && *buf++ != '>')
92                 /* nada */;
93         if (buf >= tail)
94                 return 0;
95         dateptr = buf;
96         while (buf < tail && *buf++ != '\n')
97                 /* nada */;
98         if (buf >= tail)
99                 return 0;
100         /* dateptr < buf && buf[-1] == '\n', so parsing will stop at buf-1 */
101         return parse_timestamp(dateptr, NULL, 10);
102 }
103
104 static const unsigned char *commit_graft_sha1_access(size_t index, void *table)
105 {
106         struct commit_graft **commit_graft_table = table;
107         return commit_graft_table[index]->oid.hash;
108 }
109
110 static int commit_graft_pos(struct repository *r, const unsigned char *sha1)
111 {
112         return sha1_pos(sha1, r->parsed_objects->grafts,
113                         r->parsed_objects->grafts_nr,
114                         commit_graft_sha1_access);
115 }
116
117 int register_commit_graft(struct repository *r, struct commit_graft *graft,
118                           int ignore_dups)
119 {
120         int pos = commit_graft_pos(r, graft->oid.hash);
121
122         if (0 <= pos) {
123                 if (ignore_dups)
124                         free(graft);
125                 else {
126                         free(r->parsed_objects->grafts[pos]);
127                         r->parsed_objects->grafts[pos] = graft;
128                 }
129                 return 1;
130         }
131         pos = -pos - 1;
132         ALLOC_GROW(r->parsed_objects->grafts,
133                    r->parsed_objects->grafts_nr + 1,
134                    r->parsed_objects->grafts_alloc);
135         r->parsed_objects->grafts_nr++;
136         if (pos < r->parsed_objects->grafts_nr)
137                 memmove(r->parsed_objects->grafts + pos + 1,
138                         r->parsed_objects->grafts + pos,
139                         (r->parsed_objects->grafts_nr - pos - 1) *
140                         sizeof(*r->parsed_objects->grafts));
141         r->parsed_objects->grafts[pos] = graft;
142         return 0;
143 }
144
145 struct commit_graft *read_graft_line(struct strbuf *line)
146 {
147         /* The format is just "Commit Parent1 Parent2 ...\n" */
148         int i, phase;
149         const char *tail = NULL;
150         struct commit_graft *graft = NULL;
151         struct object_id dummy_oid, *oid;
152
153         strbuf_rtrim(line);
154         if (!line->len || line->buf[0] == '#')
155                 return NULL;
156         /*
157          * phase 0 verifies line, counts hashes in line and allocates graft
158          * phase 1 fills graft
159          */
160         for (phase = 0; phase < 2; phase++) {
161                 oid = graft ? &graft->oid : &dummy_oid;
162                 if (parse_oid_hex(line->buf, oid, &tail))
163                         goto bad_graft_data;
164                 for (i = 0; *tail != '\0'; i++) {
165                         oid = graft ? &graft->parent[i] : &dummy_oid;
166                         if (!isspace(*tail++) || parse_oid_hex(tail, oid, &tail))
167                                 goto bad_graft_data;
168                 }
169                 if (!graft) {
170                         graft = xmalloc(st_add(sizeof(*graft),
171                                                st_mult(sizeof(struct object_id), i)));
172                         graft->nr_parent = i;
173                 }
174         }
175         return graft;
176
177 bad_graft_data:
178         error("bad graft data: %s", line->buf);
179         assert(!graft);
180         return NULL;
181 }
182
183 static int read_graft_file(struct repository *r, const char *graft_file)
184 {
185         FILE *fp = fopen_or_warn(graft_file, "r");
186         struct strbuf buf = STRBUF_INIT;
187         if (!fp)
188                 return -1;
189         if (advice_graft_file_deprecated)
190                 advise(_("Support for <GIT_DIR>/info/grafts is deprecated\n"
191                          "and will be removed in a future Git version.\n"
192                          "\n"
193                          "Please use \"git replace --convert-graft-file\"\n"
194                          "to convert the grafts into replace refs.\n"
195                          "\n"
196                          "Turn this message off by running\n"
197                          "\"git config advice.graftFileDeprecated false\""));
198         while (!strbuf_getwholeline(&buf, fp, '\n')) {
199                 /* The format is just "Commit Parent1 Parent2 ...\n" */
200                 struct commit_graft *graft = read_graft_line(&buf);
201                 if (!graft)
202                         continue;
203                 if (register_commit_graft(r, graft, 1))
204                         error("duplicate graft data: %s", buf.buf);
205         }
206         fclose(fp);
207         strbuf_release(&buf);
208         return 0;
209 }
210
211 static void prepare_commit_graft(struct repository *r)
212 {
213         char *graft_file;
214
215         if (r->parsed_objects->commit_graft_prepared)
216                 return;
217         if (!startup_info->have_repository)
218                 return;
219
220         graft_file = get_graft_file(r);
221         read_graft_file(r, graft_file);
222         /* make sure shallows are read */
223         is_repository_shallow(r);
224         r->parsed_objects->commit_graft_prepared = 1;
225 }
226
227 struct commit_graft *lookup_commit_graft(struct repository *r, const struct object_id *oid)
228 {
229         int pos;
230         prepare_commit_graft(r);
231         pos = commit_graft_pos(r, oid->hash);
232         if (pos < 0)
233                 return NULL;
234         return r->parsed_objects->grafts[pos];
235 }
236
237 int for_each_commit_graft(each_commit_graft_fn fn, void *cb_data)
238 {
239         int i, ret;
240         for (i = ret = 0; i < the_repository->parsed_objects->grafts_nr && !ret; i++)
241                 ret = fn(the_repository->parsed_objects->grafts[i], cb_data);
242         return ret;
243 }
244
245 int unregister_shallow(const struct object_id *oid)
246 {
247         int pos = commit_graft_pos(the_repository, oid->hash);
248         if (pos < 0)
249                 return -1;
250         if (pos + 1 < the_repository->parsed_objects->grafts_nr)
251                 MOVE_ARRAY(the_repository->parsed_objects->grafts + pos,
252                            the_repository->parsed_objects->grafts + pos + 1,
253                            the_repository->parsed_objects->grafts_nr - pos - 1);
254         the_repository->parsed_objects->grafts_nr--;
255         return 0;
256 }
257
258 struct commit_buffer {
259         void *buffer;
260         unsigned long size;
261 };
262 define_commit_slab(buffer_slab, struct commit_buffer);
263 static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab);
264
265 void set_commit_buffer_the_repository(struct commit *commit, void *buffer, unsigned long size)
266 {
267         struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
268         v->buffer = buffer;
269         v->size = size;
270 }
271
272 const void *get_cached_commit_buffer(const struct commit *commit, unsigned long *sizep)
273 {
274         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
275         if (!v) {
276                 if (sizep)
277                         *sizep = 0;
278                 return NULL;
279         }
280         if (sizep)
281                 *sizep = v->size;
282         return v->buffer;
283 }
284
285 const void *get_commit_buffer(const struct commit *commit, unsigned long *sizep)
286 {
287         const void *ret = get_cached_commit_buffer(commit, sizep);
288         if (!ret) {
289                 enum object_type type;
290                 unsigned long size;
291                 ret = read_object_file(&commit->object.oid, &type, &size);
292                 if (!ret)
293                         die("cannot read commit object %s",
294                             oid_to_hex(&commit->object.oid));
295                 if (type != OBJ_COMMIT)
296                         die("expected commit for %s, got %s",
297                             oid_to_hex(&commit->object.oid), type_name(type));
298                 if (sizep)
299                         *sizep = size;
300         }
301         return ret;
302 }
303
304 void unuse_commit_buffer(const struct commit *commit, const void *buffer)
305 {
306         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
307         if (!(v && v->buffer == buffer))
308                 free((void *)buffer);
309 }
310
311 void free_commit_buffer(struct commit *commit)
312 {
313         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
314         if (v) {
315                 FREE_AND_NULL(v->buffer);
316                 v->size = 0;
317         }
318 }
319
320 struct tree *get_commit_tree(const struct commit *commit)
321 {
322         if (commit->maybe_tree || !commit->object.parsed)
323                 return commit->maybe_tree;
324
325         if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
326                 BUG("commit has NULL tree, but was not loaded from commit-graph");
327
328         return get_commit_tree_in_graph(commit);
329 }
330
331 struct object_id *get_commit_tree_oid(const struct commit *commit)
332 {
333         return &get_commit_tree(commit)->object.oid;
334 }
335
336 void release_commit_memory(struct commit *c)
337 {
338         c->maybe_tree = NULL;
339         c->index = 0;
340         free_commit_buffer(c);
341         free_commit_list(c->parents);
342         /* TODO: what about commit->util? */
343
344         c->object.parsed = 0;
345 }
346
347 const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
348 {
349         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
350         void *ret;
351
352         if (!v) {
353                 if (sizep)
354                         *sizep = 0;
355                 return NULL;
356         }
357         ret = v->buffer;
358         if (sizep)
359                 *sizep = v->size;
360
361         v->buffer = NULL;
362         v->size = 0;
363         return ret;
364 }
365
366 int parse_commit_buffer_the_repository(struct commit *item, const void *buffer, unsigned long size, int check_graph)
367 {
368         const char *tail = buffer;
369         const char *bufptr = buffer;
370         struct object_id parent;
371         struct commit_list **pptr;
372         struct commit_graft *graft;
373         const int tree_entry_len = GIT_SHA1_HEXSZ + 5;
374         const int parent_entry_len = GIT_SHA1_HEXSZ + 7;
375
376         if (item->object.parsed)
377                 return 0;
378         item->object.parsed = 1;
379         tail += size;
380         if (tail <= bufptr + tree_entry_len + 1 || memcmp(bufptr, "tree ", 5) ||
381                         bufptr[tree_entry_len] != '\n')
382                 return error("bogus commit object %s", oid_to_hex(&item->object.oid));
383         if (get_oid_hex(bufptr + 5, &parent) < 0)
384                 return error("bad tree pointer in commit %s",
385                              oid_to_hex(&item->object.oid));
386         item->maybe_tree = lookup_tree(the_repository, &parent);
387         bufptr += tree_entry_len + 1; /* "tree " + "hex sha1" + "\n" */
388         pptr = &item->parents;
389
390         graft = lookup_commit_graft(the_repository, &item->object.oid);
391         while (bufptr + parent_entry_len < tail && !memcmp(bufptr, "parent ", 7)) {
392                 struct commit *new_parent;
393
394                 if (tail <= bufptr + parent_entry_len + 1 ||
395                     get_oid_hex(bufptr + 7, &parent) ||
396                     bufptr[parent_entry_len] != '\n')
397                         return error("bad parents in commit %s", oid_to_hex(&item->object.oid));
398                 bufptr += parent_entry_len + 1;
399                 /*
400                  * The clone is shallow if nr_parent < 0, and we must
401                  * not traverse its real parents even when we unhide them.
402                  */
403                 if (graft && (graft->nr_parent < 0 || grafts_replace_parents))
404                         continue;
405                 new_parent = lookup_commit(the_repository, &parent);
406                 if (new_parent)
407                         pptr = &commit_list_insert(new_parent, pptr)->next;
408         }
409         if (graft) {
410                 int i;
411                 struct commit *new_parent;
412                 for (i = 0; i < graft->nr_parent; i++) {
413                         new_parent = lookup_commit(the_repository,
414                                                    &graft->parent[i]);
415                         if (!new_parent)
416                                 continue;
417                         pptr = &commit_list_insert(new_parent, pptr)->next;
418                 }
419         }
420         item->date = parse_commit_date(bufptr, tail);
421
422         if (check_graph)
423                 load_commit_graph_info(item);
424
425         return 0;
426 }
427
428 int parse_commit_gently(struct commit *item, int quiet_on_missing)
429 {
430         enum object_type type;
431         void *buffer;
432         unsigned long size;
433         int ret;
434
435         if (!item)
436                 return -1;
437         if (item->object.parsed)
438                 return 0;
439         if (parse_commit_in_graph(item))
440                 return 0;
441         buffer = read_object_file(&item->object.oid, &type, &size);
442         if (!buffer)
443                 return quiet_on_missing ? -1 :
444                         error("Could not read %s",
445                              oid_to_hex(&item->object.oid));
446         if (type != OBJ_COMMIT) {
447                 free(buffer);
448                 return error("Object %s not a commit",
449                              oid_to_hex(&item->object.oid));
450         }
451         ret = parse_commit_buffer(the_repository, item, buffer, size, 0);
452         if (save_commit_buffer && !ret) {
453                 set_commit_buffer(the_repository, item, buffer, size);
454                 return 0;
455         }
456         free(buffer);
457         return ret;
458 }
459
460 void parse_commit_or_die(struct commit *item)
461 {
462         if (parse_commit(item))
463                 die("unable to parse commit %s",
464                     item ? oid_to_hex(&item->object.oid) : "(null)");
465 }
466
467 int find_commit_subject(const char *commit_buffer, const char **subject)
468 {
469         const char *eol;
470         const char *p = commit_buffer;
471
472         while (*p && (*p != '\n' || p[1] != '\n'))
473                 p++;
474         if (*p) {
475                 p = skip_blank_lines(p + 2);
476                 eol = strchrnul(p, '\n');
477         } else
478                 eol = p;
479
480         *subject = p;
481
482         return eol - p;
483 }
484
485 struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p)
486 {
487         struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
488         new_list->item = item;
489         new_list->next = *list_p;
490         *list_p = new_list;
491         return new_list;
492 }
493
494 unsigned commit_list_count(const struct commit_list *l)
495 {
496         unsigned c = 0;
497         for (; l; l = l->next )
498                 c++;
499         return c;
500 }
501
502 struct commit_list *copy_commit_list(struct commit_list *list)
503 {
504         struct commit_list *head = NULL;
505         struct commit_list **pp = &head;
506         while (list) {
507                 pp = commit_list_append(list->item, pp);
508                 list = list->next;
509         }
510         return head;
511 }
512
513 void free_commit_list(struct commit_list *list)
514 {
515         while (list)
516                 pop_commit(&list);
517 }
518
519 struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
520 {
521         struct commit_list **pp = list;
522         struct commit_list *p;
523         while ((p = *pp) != NULL) {
524                 if (p->item->date < item->date) {
525                         break;
526                 }
527                 pp = &p->next;
528         }
529         return commit_list_insert(item, pp);
530 }
531
532 static int commit_list_compare_by_date(const void *a, const void *b)
533 {
534         timestamp_t a_date = ((const struct commit_list *)a)->item->date;
535         timestamp_t b_date = ((const struct commit_list *)b)->item->date;
536         if (a_date < b_date)
537                 return 1;
538         if (a_date > b_date)
539                 return -1;
540         return 0;
541 }
542
543 static void *commit_list_get_next(const void *a)
544 {
545         return ((const struct commit_list *)a)->next;
546 }
547
548 static void commit_list_set_next(void *a, void *next)
549 {
550         ((struct commit_list *)a)->next = next;
551 }
552
553 void commit_list_sort_by_date(struct commit_list **list)
554 {
555         *list = llist_mergesort(*list, commit_list_get_next, commit_list_set_next,
556                                 commit_list_compare_by_date);
557 }
558
559 struct commit *pop_most_recent_commit(struct commit_list **list,
560                                       unsigned int mark)
561 {
562         struct commit *ret = pop_commit(list);
563         struct commit_list *parents = ret->parents;
564
565         while (parents) {
566                 struct commit *commit = parents->item;
567                 if (!parse_commit(commit) && !(commit->object.flags & mark)) {
568                         commit->object.flags |= mark;
569                         commit_list_insert_by_date(commit, list);
570                 }
571                 parents = parents->next;
572         }
573         return ret;
574 }
575
576 static void clear_commit_marks_1(struct commit_list **plist,
577                                  struct commit *commit, unsigned int mark)
578 {
579         while (commit) {
580                 struct commit_list *parents;
581
582                 if (!(mark & commit->object.flags))
583                         return;
584
585                 commit->object.flags &= ~mark;
586
587                 parents = commit->parents;
588                 if (!parents)
589                         return;
590
591                 while ((parents = parents->next))
592                         commit_list_insert(parents->item, plist);
593
594                 commit = commit->parents->item;
595         }
596 }
597
598 void clear_commit_marks_many(int nr, struct commit **commit, unsigned int mark)
599 {
600         struct commit_list *list = NULL;
601
602         while (nr--) {
603                 clear_commit_marks_1(&list, *commit, mark);
604                 commit++;
605         }
606         while (list)
607                 clear_commit_marks_1(&list, pop_commit(&list), mark);
608 }
609
610 void clear_commit_marks(struct commit *commit, unsigned int mark)
611 {
612         clear_commit_marks_many(1, &commit, mark);
613 }
614
615 struct commit *pop_commit(struct commit_list **stack)
616 {
617         struct commit_list *top = *stack;
618         struct commit *item = top ? top->item : NULL;
619
620         if (top) {
621                 *stack = top->next;
622                 free(top);
623         }
624         return item;
625 }
626
627 /*
628  * Topological sort support
629  */
630
631 /* count number of children that have not been emitted */
632 define_commit_slab(indegree_slab, int);
633
634 /* record author-date for each commit object */
635 define_commit_slab(author_date_slab, unsigned long);
636
637 static void record_author_date(struct author_date_slab *author_date,
638                                struct commit *commit)
639 {
640         const char *buffer = get_commit_buffer(commit, NULL);
641         struct ident_split ident;
642         const char *ident_line;
643         size_t ident_len;
644         char *date_end;
645         timestamp_t date;
646
647         ident_line = find_commit_header(buffer, "author", &ident_len);
648         if (!ident_line)
649                 goto fail_exit; /* no author line */
650         if (split_ident_line(&ident, ident_line, ident_len) ||
651             !ident.date_begin || !ident.date_end)
652                 goto fail_exit; /* malformed "author" line */
653
654         date = parse_timestamp(ident.date_begin, &date_end, 10);
655         if (date_end != ident.date_end)
656                 goto fail_exit; /* malformed date */
657         *(author_date_slab_at(author_date, commit)) = date;
658
659 fail_exit:
660         unuse_commit_buffer(commit, buffer);
661 }
662
663 static int compare_commits_by_author_date(const void *a_, const void *b_,
664                                           void *cb_data)
665 {
666         const struct commit *a = a_, *b = b_;
667         struct author_date_slab *author_date = cb_data;
668         timestamp_t a_date = *(author_date_slab_at(author_date, a));
669         timestamp_t b_date = *(author_date_slab_at(author_date, b));
670
671         /* newer commits with larger date first */
672         if (a_date < b_date)
673                 return 1;
674         else if (a_date > b_date)
675                 return -1;
676         return 0;
677 }
678
679 int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused)
680 {
681         const struct commit *a = a_, *b = b_;
682
683         /* newer commits first */
684         if (a->generation < b->generation)
685                 return 1;
686         else if (a->generation > b->generation)
687                 return -1;
688
689         /* use date as a heuristic when generations are equal */
690         if (a->date < b->date)
691                 return 1;
692         else if (a->date > b->date)
693                 return -1;
694         return 0;
695 }
696
697 int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
698 {
699         const struct commit *a = a_, *b = b_;
700         /* newer commits with larger date first */
701         if (a->date < b->date)
702                 return 1;
703         else if (a->date > b->date)
704                 return -1;
705         return 0;
706 }
707
708 /*
709  * Performs an in-place topological sort on the list supplied.
710  */
711 void sort_in_topological_order(struct commit_list **list, enum rev_sort_order sort_order)
712 {
713         struct commit_list *next, *orig = *list;
714         struct commit_list **pptr;
715         struct indegree_slab indegree;
716         struct prio_queue queue;
717         struct commit *commit;
718         struct author_date_slab author_date;
719
720         if (!orig)
721                 return;
722         *list = NULL;
723
724         init_indegree_slab(&indegree);
725         memset(&queue, '\0', sizeof(queue));
726
727         switch (sort_order) {
728         default: /* REV_SORT_IN_GRAPH_ORDER */
729                 queue.compare = NULL;
730                 break;
731         case REV_SORT_BY_COMMIT_DATE:
732                 queue.compare = compare_commits_by_commit_date;
733                 break;
734         case REV_SORT_BY_AUTHOR_DATE:
735                 init_author_date_slab(&author_date);
736                 queue.compare = compare_commits_by_author_date;
737                 queue.cb_data = &author_date;
738                 break;
739         }
740
741         /* Mark them and clear the indegree */
742         for (next = orig; next; next = next->next) {
743                 struct commit *commit = next->item;
744                 *(indegree_slab_at(&indegree, commit)) = 1;
745                 /* also record the author dates, if needed */
746                 if (sort_order == REV_SORT_BY_AUTHOR_DATE)
747                         record_author_date(&author_date, commit);
748         }
749
750         /* update the indegree */
751         for (next = orig; next; next = next->next) {
752                 struct commit_list *parents = next->item->parents;
753                 while (parents) {
754                         struct commit *parent = parents->item;
755                         int *pi = indegree_slab_at(&indegree, parent);
756
757                         if (*pi)
758                                 (*pi)++;
759                         parents = parents->next;
760                 }
761         }
762
763         /*
764          * find the tips
765          *
766          * tips are nodes not reachable from any other node in the list
767          *
768          * the tips serve as a starting set for the work queue.
769          */
770         for (next = orig; next; next = next->next) {
771                 struct commit *commit = next->item;
772
773                 if (*(indegree_slab_at(&indegree, commit)) == 1)
774                         prio_queue_put(&queue, commit);
775         }
776
777         /*
778          * This is unfortunate; the initial tips need to be shown
779          * in the order given from the revision traversal machinery.
780          */
781         if (sort_order == REV_SORT_IN_GRAPH_ORDER)
782                 prio_queue_reverse(&queue);
783
784         /* We no longer need the commit list */
785         free_commit_list(orig);
786
787         pptr = list;
788         *list = NULL;
789         while ((commit = prio_queue_get(&queue)) != NULL) {
790                 struct commit_list *parents;
791
792                 for (parents = commit->parents; parents ; parents = parents->next) {
793                         struct commit *parent = parents->item;
794                         int *pi = indegree_slab_at(&indegree, parent);
795
796                         if (!*pi)
797                                 continue;
798
799                         /*
800                          * parents are only enqueued for emission
801                          * when all their children have been emitted thereby
802                          * guaranteeing topological order.
803                          */
804                         if (--(*pi) == 1)
805                                 prio_queue_put(&queue, parent);
806                 }
807                 /*
808                  * all children of commit have already been
809                  * emitted. we can emit it now.
810                  */
811                 *(indegree_slab_at(&indegree, commit)) = 0;
812
813                 pptr = &commit_list_insert(commit, pptr)->next;
814         }
815
816         clear_indegree_slab(&indegree);
817         clear_prio_queue(&queue);
818         if (sort_order == REV_SORT_BY_AUTHOR_DATE)
819                 clear_author_date_slab(&author_date);
820 }
821
822 /* merge-base stuff */
823
824 /* Remember to update object flag allocation in object.h */
825 #define PARENT1         (1u<<16)
826 #define PARENT2         (1u<<17)
827 #define STALE           (1u<<18)
828 #define RESULT          (1u<<19)
829
830 static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
831
832 static int queue_has_nonstale(struct prio_queue *queue)
833 {
834         int i;
835         for (i = 0; i < queue->nr; i++) {
836                 struct commit *commit = queue->array[i].data;
837                 if (!(commit->object.flags & STALE))
838                         return 1;
839         }
840         return 0;
841 }
842
843 /* all input commits in one and twos[] must have been parsed! */
844 static struct commit_list *paint_down_to_common(struct commit *one, int n,
845                                                 struct commit **twos,
846                                                 int min_generation)
847 {
848         struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
849         struct commit_list *result = NULL;
850         int i;
851         uint32_t last_gen = GENERATION_NUMBER_INFINITY;
852
853         one->object.flags |= PARENT1;
854         if (!n) {
855                 commit_list_append(one, &result);
856                 return result;
857         }
858         prio_queue_put(&queue, one);
859
860         for (i = 0; i < n; i++) {
861                 twos[i]->object.flags |= PARENT2;
862                 prio_queue_put(&queue, twos[i]);
863         }
864
865         while (queue_has_nonstale(&queue)) {
866                 struct commit *commit = prio_queue_get(&queue);
867                 struct commit_list *parents;
868                 int flags;
869
870                 if (commit->generation > last_gen)
871                         BUG("bad generation skip %8x > %8x at %s",
872                             commit->generation, last_gen,
873                             oid_to_hex(&commit->object.oid));
874                 last_gen = commit->generation;
875
876                 if (commit->generation < min_generation)
877                         break;
878
879                 flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
880                 if (flags == (PARENT1 | PARENT2)) {
881                         if (!(commit->object.flags & RESULT)) {
882                                 commit->object.flags |= RESULT;
883                                 commit_list_insert_by_date(commit, &result);
884                         }
885                         /* Mark parents of a found merge stale */
886                         flags |= STALE;
887                 }
888                 parents = commit->parents;
889                 while (parents) {
890                         struct commit *p = parents->item;
891                         parents = parents->next;
892                         if ((p->object.flags & flags) == flags)
893                                 continue;
894                         if (parse_commit(p))
895                                 return NULL;
896                         p->object.flags |= flags;
897                         prio_queue_put(&queue, p);
898                 }
899         }
900
901         clear_prio_queue(&queue);
902         return result;
903 }
904
905 static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
906 {
907         struct commit_list *list = NULL;
908         struct commit_list *result = NULL;
909         int i;
910
911         for (i = 0; i < n; i++) {
912                 if (one == twos[i])
913                         /*
914                          * We do not mark this even with RESULT so we do not
915                          * have to clean it up.
916                          */
917                         return commit_list_insert(one, &result);
918         }
919
920         if (parse_commit(one))
921                 return NULL;
922         for (i = 0; i < n; i++) {
923                 if (parse_commit(twos[i]))
924                         return NULL;
925         }
926
927         list = paint_down_to_common(one, n, twos, 0);
928
929         while (list) {
930                 struct commit *commit = pop_commit(&list);
931                 if (!(commit->object.flags & STALE))
932                         commit_list_insert_by_date(commit, &result);
933         }
934         return result;
935 }
936
937 struct commit_list *get_octopus_merge_bases(struct commit_list *in)
938 {
939         struct commit_list *i, *j, *k, *ret = NULL;
940
941         if (!in)
942                 return ret;
943
944         commit_list_insert(in->item, &ret);
945
946         for (i = in->next; i; i = i->next) {
947                 struct commit_list *new_commits = NULL, *end = NULL;
948
949                 for (j = ret; j; j = j->next) {
950                         struct commit_list *bases;
951                         bases = get_merge_bases(i->item, j->item);
952                         if (!new_commits)
953                                 new_commits = bases;
954                         else
955                                 end->next = bases;
956                         for (k = bases; k; k = k->next)
957                                 end = k;
958                 }
959                 ret = new_commits;
960         }
961         return ret;
962 }
963
964 static int remove_redundant(struct commit **array, int cnt)
965 {
966         /*
967          * Some commit in the array may be an ancestor of
968          * another commit.  Move such commit to the end of
969          * the array, and return the number of commits that
970          * are independent from each other.
971          */
972         struct commit **work;
973         unsigned char *redundant;
974         int *filled_index;
975         int i, j, filled;
976
977         work = xcalloc(cnt, sizeof(*work));
978         redundant = xcalloc(cnt, 1);
979         ALLOC_ARRAY(filled_index, cnt - 1);
980
981         for (i = 0; i < cnt; i++)
982                 parse_commit(array[i]);
983         for (i = 0; i < cnt; i++) {
984                 struct commit_list *common;
985                 uint32_t min_generation = array[i]->generation;
986
987                 if (redundant[i])
988                         continue;
989                 for (j = filled = 0; j < cnt; j++) {
990                         if (i == j || redundant[j])
991                                 continue;
992                         filled_index[filled] = j;
993                         work[filled++] = array[j];
994
995                         if (array[j]->generation < min_generation)
996                                 min_generation = array[j]->generation;
997                 }
998                 common = paint_down_to_common(array[i], filled, work,
999                                               min_generation);
1000                 if (array[i]->object.flags & PARENT2)
1001                         redundant[i] = 1;
1002                 for (j = 0; j < filled; j++)
1003                         if (work[j]->object.flags & PARENT1)
1004                                 redundant[filled_index[j]] = 1;
1005                 clear_commit_marks(array[i], all_flags);
1006                 clear_commit_marks_many(filled, work, all_flags);
1007                 free_commit_list(common);
1008         }
1009
1010         /* Now collect the result */
1011         COPY_ARRAY(work, array, cnt);
1012         for (i = filled = 0; i < cnt; i++)
1013                 if (!redundant[i])
1014                         array[filled++] = work[i];
1015         for (j = filled, i = 0; i < cnt; i++)
1016                 if (redundant[i])
1017                         array[j++] = work[i];
1018         free(work);
1019         free(redundant);
1020         free(filled_index);
1021         return filled;
1022 }
1023
1024 static struct commit_list *get_merge_bases_many_0(struct commit *one,
1025                                                   int n,
1026                                                   struct commit **twos,
1027                                                   int cleanup)
1028 {
1029         struct commit_list *list;
1030         struct commit **rslt;
1031         struct commit_list *result;
1032         int cnt, i;
1033
1034         result = merge_bases_many(one, n, twos);
1035         for (i = 0; i < n; i++) {
1036                 if (one == twos[i])
1037                         return result;
1038         }
1039         if (!result || !result->next) {
1040                 if (cleanup) {
1041                         clear_commit_marks(one, all_flags);
1042                         clear_commit_marks_many(n, twos, all_flags);
1043                 }
1044                 return result;
1045         }
1046
1047         /* There are more than one */
1048         cnt = commit_list_count(result);
1049         rslt = xcalloc(cnt, sizeof(*rslt));
1050         for (list = result, i = 0; list; list = list->next)
1051                 rslt[i++] = list->item;
1052         free_commit_list(result);
1053
1054         clear_commit_marks(one, all_flags);
1055         clear_commit_marks_many(n, twos, all_flags);
1056
1057         cnt = remove_redundant(rslt, cnt);
1058         result = NULL;
1059         for (i = 0; i < cnt; i++)
1060                 commit_list_insert_by_date(rslt[i], &result);
1061         free(rslt);
1062         return result;
1063 }
1064
1065 struct commit_list *get_merge_bases_many(struct commit *one,
1066                                          int n,
1067                                          struct commit **twos)
1068 {
1069         return get_merge_bases_many_0(one, n, twos, 1);
1070 }
1071
1072 struct commit_list *get_merge_bases_many_dirty(struct commit *one,
1073                                                int n,
1074                                                struct commit **twos)
1075 {
1076         return get_merge_bases_many_0(one, n, twos, 0);
1077 }
1078
1079 struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
1080 {
1081         return get_merge_bases_many_0(one, 1, &two, 1);
1082 }
1083
1084 /*
1085  * Is "commit" a descendant of one of the elements on the "with_commit" list?
1086  */
1087 int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
1088 {
1089         if (!with_commit)
1090                 return 1;
1091         while (with_commit) {
1092                 struct commit *other;
1093
1094                 other = with_commit->item;
1095                 with_commit = with_commit->next;
1096                 if (in_merge_bases(other, commit))
1097                         return 1;
1098         }
1099         return 0;
1100 }
1101
1102 /*
1103  * Is "commit" an ancestor of one of the "references"?
1104  */
1105 int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
1106 {
1107         struct commit_list *bases;
1108         int ret = 0, i;
1109         uint32_t min_generation = GENERATION_NUMBER_INFINITY;
1110
1111         if (parse_commit(commit))
1112                 return ret;
1113         for (i = 0; i < nr_reference; i++) {
1114                 if (parse_commit(reference[i]))
1115                         return ret;
1116                 if (reference[i]->generation < min_generation)
1117                         min_generation = reference[i]->generation;
1118         }
1119
1120         if (commit->generation > min_generation)
1121                 return ret;
1122
1123         bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
1124         if (commit->object.flags & PARENT2)
1125                 ret = 1;
1126         clear_commit_marks(commit, all_flags);
1127         clear_commit_marks_many(nr_reference, reference, all_flags);
1128         free_commit_list(bases);
1129         return ret;
1130 }
1131
1132 /*
1133  * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
1134  */
1135 int in_merge_bases(struct commit *commit, struct commit *reference)
1136 {
1137         return in_merge_bases_many(commit, 1, &reference);
1138 }
1139
1140 struct commit_list *reduce_heads(struct commit_list *heads)
1141 {
1142         struct commit_list *p;
1143         struct commit_list *result = NULL, **tail = &result;
1144         struct commit **array;
1145         int num_head, i;
1146
1147         if (!heads)
1148                 return NULL;
1149
1150         /* Uniquify */
1151         for (p = heads; p; p = p->next)
1152                 p->item->object.flags &= ~STALE;
1153         for (p = heads, num_head = 0; p; p = p->next) {
1154                 if (p->item->object.flags & STALE)
1155                         continue;
1156                 p->item->object.flags |= STALE;
1157                 num_head++;
1158         }
1159         array = xcalloc(num_head, sizeof(*array));
1160         for (p = heads, i = 0; p; p = p->next) {
1161                 if (p->item->object.flags & STALE) {
1162                         array[i++] = p->item;
1163                         p->item->object.flags &= ~STALE;
1164                 }
1165         }
1166         num_head = remove_redundant(array, num_head);
1167         for (i = 0; i < num_head; i++)
1168                 tail = &commit_list_insert(array[i], tail)->next;
1169         free(array);
1170         return result;
1171 }
1172
1173 void reduce_heads_replace(struct commit_list **heads)
1174 {
1175         struct commit_list *result = reduce_heads(*heads);
1176         free_commit_list(*heads);
1177         *heads = result;
1178 }
1179
1180 static const char gpg_sig_header[] = "gpgsig";
1181 static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
1182
1183 static int do_sign_commit(struct strbuf *buf, const char *keyid)
1184 {
1185         struct strbuf sig = STRBUF_INIT;
1186         int inspos, copypos;
1187         const char *eoh;
1188
1189         /* find the end of the header */
1190         eoh = strstr(buf->buf, "\n\n");
1191         if (!eoh)
1192                 inspos = buf->len;
1193         else
1194                 inspos = eoh - buf->buf + 1;
1195
1196         if (!keyid || !*keyid)
1197                 keyid = get_signing_key();
1198         if (sign_buffer(buf, &sig, keyid)) {
1199                 strbuf_release(&sig);
1200                 return -1;
1201         }
1202
1203         for (copypos = 0; sig.buf[copypos]; ) {
1204                 const char *bol = sig.buf + copypos;
1205                 const char *eol = strchrnul(bol, '\n');
1206                 int len = (eol - bol) + !!*eol;
1207
1208                 if (!copypos) {
1209                         strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len);
1210                         inspos += gpg_sig_header_len;
1211                 }
1212                 strbuf_insert(buf, inspos++, " ", 1);
1213                 strbuf_insert(buf, inspos, bol, len);
1214                 inspos += len;
1215                 copypos += len;
1216         }
1217         strbuf_release(&sig);
1218         return 0;
1219 }
1220
1221 int parse_signed_commit(const struct commit *commit,
1222                         struct strbuf *payload, struct strbuf *signature)
1223 {
1224
1225         unsigned long size;
1226         const char *buffer = get_commit_buffer(commit, &size);
1227         int in_signature, saw_signature = -1;
1228         const char *line, *tail;
1229
1230         line = buffer;
1231         tail = buffer + size;
1232         in_signature = 0;
1233         saw_signature = 0;
1234         while (line < tail) {
1235                 const char *sig = NULL;
1236                 const char *next = memchr(line, '\n', tail - line);
1237
1238                 next = next ? next + 1 : tail;
1239                 if (in_signature && line[0] == ' ')
1240                         sig = line + 1;
1241                 else if (starts_with(line, gpg_sig_header) &&
1242                          line[gpg_sig_header_len] == ' ')
1243                         sig = line + gpg_sig_header_len + 1;
1244                 if (sig) {
1245                         strbuf_add(signature, sig, next - sig);
1246                         saw_signature = 1;
1247                         in_signature = 1;
1248                 } else {
1249                         if (*line == '\n')
1250                                 /* dump the whole remainder of the buffer */
1251                                 next = tail;
1252                         strbuf_add(payload, line, next - line);
1253                         in_signature = 0;
1254                 }
1255                 line = next;
1256         }
1257         unuse_commit_buffer(commit, buffer);
1258         return saw_signature;
1259 }
1260
1261 int remove_signature(struct strbuf *buf)
1262 {
1263         const char *line = buf->buf;
1264         const char *tail = buf->buf + buf->len;
1265         int in_signature = 0;
1266         const char *sig_start = NULL;
1267         const char *sig_end = NULL;
1268
1269         while (line < tail) {
1270                 const char *next = memchr(line, '\n', tail - line);
1271                 next = next ? next + 1 : tail;
1272
1273                 if (in_signature && line[0] == ' ')
1274                         sig_end = next;
1275                 else if (starts_with(line, gpg_sig_header) &&
1276                          line[gpg_sig_header_len] == ' ') {
1277                         sig_start = line;
1278                         sig_end = next;
1279                         in_signature = 1;
1280                 } else {
1281                         if (*line == '\n')
1282                                 /* dump the whole remainder of the buffer */
1283                                 next = tail;
1284                         in_signature = 0;
1285                 }
1286                 line = next;
1287         }
1288
1289         if (sig_start)
1290                 strbuf_remove(buf, sig_start - buf->buf, sig_end - sig_start);
1291
1292         return sig_start != NULL;
1293 }
1294
1295 static void handle_signed_tag(struct commit *parent, struct commit_extra_header ***tail)
1296 {
1297         struct merge_remote_desc *desc;
1298         struct commit_extra_header *mergetag;
1299         char *buf;
1300         unsigned long size, len;
1301         enum object_type type;
1302
1303         desc = merge_remote_util(parent);
1304         if (!desc || !desc->obj)
1305                 return;
1306         buf = read_object_file(&desc->obj->oid, &type, &size);
1307         if (!buf || type != OBJ_TAG)
1308                 goto free_return;
1309         len = parse_signature(buf, size);
1310         if (size == len)
1311                 goto free_return;
1312         /*
1313          * We could verify this signature and either omit the tag when
1314          * it does not validate, but the integrator may not have the
1315          * public key of the signer of the tag he is merging, while a
1316          * later auditor may have it while auditing, so let's not run
1317          * verify-signed-buffer here for now...
1318          *
1319          * if (verify_signed_buffer(buf, len, buf + len, size - len, ...))
1320          *      warn("warning: signed tag unverified.");
1321          */
1322         mergetag = xcalloc(1, sizeof(*mergetag));
1323         mergetag->key = xstrdup("mergetag");
1324         mergetag->value = buf;
1325         mergetag->len = size;
1326
1327         **tail = mergetag;
1328         *tail = &mergetag->next;
1329         return;
1330
1331 free_return:
1332         free(buf);
1333 }
1334
1335 int check_commit_signature(const struct commit *commit, struct signature_check *sigc)
1336 {
1337         struct strbuf payload = STRBUF_INIT;
1338         struct strbuf signature = STRBUF_INIT;
1339         int ret = 1;
1340
1341         sigc->result = 'N';
1342
1343         if (parse_signed_commit(commit, &payload, &signature) <= 0)
1344                 goto out;
1345         ret = check_signature(payload.buf, payload.len, signature.buf,
1346                 signature.len, sigc);
1347
1348  out:
1349         strbuf_release(&payload);
1350         strbuf_release(&signature);
1351
1352         return ret;
1353 }
1354
1355
1356
1357 void append_merge_tag_headers(struct commit_list *parents,
1358                               struct commit_extra_header ***tail)
1359 {
1360         while (parents) {
1361                 struct commit *parent = parents->item;
1362                 handle_signed_tag(parent, tail);
1363                 parents = parents->next;
1364         }
1365 }
1366
1367 static void add_extra_header(struct strbuf *buffer,
1368                              struct commit_extra_header *extra)
1369 {
1370         strbuf_addstr(buffer, extra->key);
1371         if (extra->len)
1372                 strbuf_add_lines(buffer, " ", extra->value, extra->len);
1373         else
1374                 strbuf_addch(buffer, '\n');
1375 }
1376
1377 struct commit_extra_header *read_commit_extra_headers(struct commit *commit,
1378                                                       const char **exclude)
1379 {
1380         struct commit_extra_header *extra = NULL;
1381         unsigned long size;
1382         const char *buffer = get_commit_buffer(commit, &size);
1383         extra = read_commit_extra_header_lines(buffer, size, exclude);
1384         unuse_commit_buffer(commit, buffer);
1385         return extra;
1386 }
1387
1388 int for_each_mergetag(each_mergetag_fn fn, struct commit *commit, void *data)
1389 {
1390         struct commit_extra_header *extra, *to_free;
1391         int res = 0;
1392
1393         to_free = read_commit_extra_headers(commit, NULL);
1394         for (extra = to_free; !res && extra; extra = extra->next) {
1395                 if (strcmp(extra->key, "mergetag"))
1396                         continue; /* not a merge tag */
1397                 res = fn(commit, extra, data);
1398         }
1399         free_commit_extra_headers(to_free);
1400         return res;
1401 }
1402
1403 static inline int standard_header_field(const char *field, size_t len)
1404 {
1405         return ((len == 4 && !memcmp(field, "tree", 4)) ||
1406                 (len == 6 && !memcmp(field, "parent", 6)) ||
1407                 (len == 6 && !memcmp(field, "author", 6)) ||
1408                 (len == 9 && !memcmp(field, "committer", 9)) ||
1409                 (len == 8 && !memcmp(field, "encoding", 8)));
1410 }
1411
1412 static int excluded_header_field(const char *field, size_t len, const char **exclude)
1413 {
1414         if (!exclude)
1415                 return 0;
1416
1417         while (*exclude) {
1418                 size_t xlen = strlen(*exclude);
1419                 if (len == xlen && !memcmp(field, *exclude, xlen))
1420                         return 1;
1421                 exclude++;
1422         }
1423         return 0;
1424 }
1425
1426 static struct commit_extra_header *read_commit_extra_header_lines(
1427         const char *buffer, size_t size,
1428         const char **exclude)
1429 {
1430         struct commit_extra_header *extra = NULL, **tail = &extra, *it = NULL;
1431         const char *line, *next, *eof, *eob;
1432         struct strbuf buf = STRBUF_INIT;
1433
1434         for (line = buffer, eob = line + size;
1435              line < eob && *line != '\n';
1436              line = next) {
1437                 next = memchr(line, '\n', eob - line);
1438                 next = next ? next + 1 : eob;
1439                 if (*line == ' ') {
1440                         /* continuation */
1441                         if (it)
1442                                 strbuf_add(&buf, line + 1, next - (line + 1));
1443                         continue;
1444                 }
1445                 if (it)
1446                         it->value = strbuf_detach(&buf, &it->len);
1447                 strbuf_reset(&buf);
1448                 it = NULL;
1449
1450                 eof = memchr(line, ' ', next - line);
1451                 if (!eof)
1452                         eof = next;
1453                 else if (standard_header_field(line, eof - line) ||
1454                          excluded_header_field(line, eof - line, exclude))
1455                         continue;
1456
1457                 it = xcalloc(1, sizeof(*it));
1458                 it->key = xmemdupz(line, eof-line);
1459                 *tail = it;
1460                 tail = &it->next;
1461                 if (eof + 1 < next)
1462                         strbuf_add(&buf, eof + 1, next - (eof + 1));
1463         }
1464         if (it)
1465                 it->value = strbuf_detach(&buf, &it->len);
1466         return extra;
1467 }
1468
1469 void free_commit_extra_headers(struct commit_extra_header *extra)
1470 {
1471         while (extra) {
1472                 struct commit_extra_header *next = extra->next;
1473                 free(extra->key);
1474                 free(extra->value);
1475                 free(extra);
1476                 extra = next;
1477         }
1478 }
1479
1480 int commit_tree(const char *msg, size_t msg_len, const struct object_id *tree,
1481                 struct commit_list *parents, struct object_id *ret,
1482                 const char *author, const char *sign_commit)
1483 {
1484         struct commit_extra_header *extra = NULL, **tail = &extra;
1485         int result;
1486
1487         append_merge_tag_headers(parents, &tail);
1488         result = commit_tree_extended(msg, msg_len, tree, parents, ret,
1489                                       author, sign_commit, extra);
1490         free_commit_extra_headers(extra);
1491         return result;
1492 }
1493
1494 static int find_invalid_utf8(const char *buf, int len)
1495 {
1496         int offset = 0;
1497         static const unsigned int max_codepoint[] = {
1498                 0x7f, 0x7ff, 0xffff, 0x10ffff
1499         };
1500
1501         while (len) {
1502                 unsigned char c = *buf++;
1503                 int bytes, bad_offset;
1504                 unsigned int codepoint;
1505                 unsigned int min_val, max_val;
1506
1507                 len--;
1508                 offset++;
1509
1510                 /* Simple US-ASCII? No worries. */
1511                 if (c < 0x80)
1512                         continue;
1513
1514                 bad_offset = offset-1;
1515
1516                 /*
1517                  * Count how many more high bits set: that's how
1518                  * many more bytes this sequence should have.
1519                  */
1520                 bytes = 0;
1521                 while (c & 0x40) {
1522                         c <<= 1;
1523                         bytes++;
1524                 }
1525
1526                 /*
1527                  * Must be between 1 and 3 more bytes.  Longer sequences result in
1528                  * codepoints beyond U+10FFFF, which are guaranteed never to exist.
1529                  */
1530                 if (bytes < 1 || 3 < bytes)
1531                         return bad_offset;
1532
1533                 /* Do we *have* that many bytes? */
1534                 if (len < bytes)
1535                         return bad_offset;
1536
1537                 /*
1538                  * Place the encoded bits at the bottom of the value and compute the
1539                  * valid range.
1540                  */
1541                 codepoint = (c & 0x7f) >> bytes;
1542                 min_val = max_codepoint[bytes-1] + 1;
1543                 max_val = max_codepoint[bytes];
1544
1545                 offset += bytes;
1546                 len -= bytes;
1547
1548                 /* And verify that they are good continuation bytes */
1549                 do {
1550                         codepoint <<= 6;
1551                         codepoint |= *buf & 0x3f;
1552                         if ((*buf++ & 0xc0) != 0x80)
1553                                 return bad_offset;
1554                 } while (--bytes);
1555
1556                 /* Reject codepoints that are out of range for the sequence length. */
1557                 if (codepoint < min_val || codepoint > max_val)
1558                         return bad_offset;
1559                 /* Surrogates are only for UTF-16 and cannot be encoded in UTF-8. */
1560                 if ((codepoint & 0x1ff800) == 0xd800)
1561                         return bad_offset;
1562                 /* U+xxFFFE and U+xxFFFF are guaranteed non-characters. */
1563                 if ((codepoint & 0xfffe) == 0xfffe)
1564                         return bad_offset;
1565                 /* So are anything in the range U+FDD0..U+FDEF. */
1566                 if (codepoint >= 0xfdd0 && codepoint <= 0xfdef)
1567                         return bad_offset;
1568         }
1569         return -1;
1570 }
1571
1572 /*
1573  * This verifies that the buffer is in proper utf8 format.
1574  *
1575  * If it isn't, it assumes any non-utf8 characters are Latin1,
1576  * and does the conversion.
1577  */
1578 static int verify_utf8(struct strbuf *buf)
1579 {
1580         int ok = 1;
1581         long pos = 0;
1582
1583         for (;;) {
1584                 int bad;
1585                 unsigned char c;
1586                 unsigned char replace[2];
1587
1588                 bad = find_invalid_utf8(buf->buf + pos, buf->len - pos);
1589                 if (bad < 0)
1590                         return ok;
1591                 pos += bad;
1592                 ok = 0;
1593                 c = buf->buf[pos];
1594                 strbuf_remove(buf, pos, 1);
1595
1596                 /* We know 'c' must be in the range 128-255 */
1597                 replace[0] = 0xc0 + (c >> 6);
1598                 replace[1] = 0x80 + (c & 0x3f);
1599                 strbuf_insert(buf, pos, replace, 2);
1600                 pos += 2;
1601         }
1602 }
1603
1604 static const char commit_utf8_warn[] =
1605 N_("Warning: commit message did not conform to UTF-8.\n"
1606    "You may want to amend it after fixing the message, or set the config\n"
1607    "variable i18n.commitencoding to the encoding your project uses.\n");
1608
1609 int commit_tree_extended(const char *msg, size_t msg_len,
1610                          const struct object_id *tree,
1611                          struct commit_list *parents, struct object_id *ret,
1612                          const char *author, const char *sign_commit,
1613                          struct commit_extra_header *extra)
1614 {
1615         int result;
1616         int encoding_is_utf8;
1617         struct strbuf buffer;
1618
1619         assert_oid_type(tree, OBJ_TREE);
1620
1621         if (memchr(msg, '\0', msg_len))
1622                 return error("a NUL byte in commit log message not allowed.");
1623
1624         /* Not having i18n.commitencoding is the same as having utf-8 */
1625         encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);
1626
1627         strbuf_init(&buffer, 8192); /* should avoid reallocs for the headers */
1628         strbuf_addf(&buffer, "tree %s\n", oid_to_hex(tree));
1629
1630         /*
1631          * NOTE! This ordering means that the same exact tree merged with a
1632          * different order of parents will be a _different_ changeset even
1633          * if everything else stays the same.
1634          */
1635         while (parents) {
1636                 struct commit *parent = pop_commit(&parents);
1637                 strbuf_addf(&buffer, "parent %s\n",
1638                             oid_to_hex(&parent->object.oid));
1639         }
1640
1641         /* Person/date information */
1642         if (!author)
1643                 author = git_author_info(IDENT_STRICT);
1644         strbuf_addf(&buffer, "author %s\n", author);
1645         strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_STRICT));
1646         if (!encoding_is_utf8)
1647                 strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding);
1648
1649         while (extra) {
1650                 add_extra_header(&buffer, extra);
1651                 extra = extra->next;
1652         }
1653         strbuf_addch(&buffer, '\n');
1654
1655         /* And add the comment */
1656         strbuf_add(&buffer, msg, msg_len);
1657
1658         /* And check the encoding */
1659         if (encoding_is_utf8 && !verify_utf8(&buffer))
1660                 fprintf(stderr, _(commit_utf8_warn));
1661
1662         if (sign_commit && do_sign_commit(&buffer, sign_commit)) {
1663                 result = -1;
1664                 goto out;
1665         }
1666
1667         result = write_object_file(buffer.buf, buffer.len, commit_type, ret);
1668 out:
1669         strbuf_release(&buffer);
1670         return result;
1671 }
1672
1673 define_commit_slab(merge_desc_slab, struct merge_remote_desc *);
1674 static struct merge_desc_slab merge_desc_slab = COMMIT_SLAB_INIT(1, merge_desc_slab);
1675
1676 struct merge_remote_desc *merge_remote_util(struct commit *commit)
1677 {
1678         return *merge_desc_slab_at(&merge_desc_slab, commit);
1679 }
1680
1681 void set_merge_remote_desc(struct commit *commit,
1682                            const char *name, struct object *obj)
1683 {
1684         struct merge_remote_desc *desc;
1685         FLEX_ALLOC_STR(desc, name, name);
1686         desc->obj = obj;
1687         *merge_desc_slab_at(&merge_desc_slab, commit) = desc;
1688 }
1689
1690 struct commit *get_merge_parent(const char *name)
1691 {
1692         struct object *obj;
1693         struct commit *commit;
1694         struct object_id oid;
1695         if (get_oid(name, &oid))
1696                 return NULL;
1697         obj = parse_object(the_repository, &oid);
1698         commit = (struct commit *)peel_to_type(name, 0, obj, OBJ_COMMIT);
1699         if (commit && !merge_remote_util(commit))
1700                 set_merge_remote_desc(commit, name, obj);
1701         return commit;
1702 }
1703
1704 /*
1705  * Append a commit to the end of the commit_list.
1706  *
1707  * next starts by pointing to the variable that holds the head of an
1708  * empty commit_list, and is updated to point to the "next" field of
1709  * the last item on the list as new commits are appended.
1710  *
1711  * Usage example:
1712  *
1713  *     struct commit_list *list;
1714  *     struct commit_list **next = &list;
1715  *
1716  *     next = commit_list_append(c1, next);
1717  *     next = commit_list_append(c2, next);
1718  *     assert(commit_list_count(list) == 2);
1719  *     return list;
1720  */
1721 struct commit_list **commit_list_append(struct commit *commit,
1722                                         struct commit_list **next)
1723 {
1724         struct commit_list *new_commit = xmalloc(sizeof(struct commit_list));
1725         new_commit->item = commit;
1726         *next = new_commit;
1727         new_commit->next = NULL;
1728         return &new_commit->next;
1729 }
1730
1731 const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
1732 {
1733         int key_len = strlen(key);
1734         const char *line = msg;
1735
1736         while (line) {
1737                 const char *eol = strchrnul(line, '\n');
1738
1739                 if (line == eol)
1740                         return NULL;
1741
1742                 if (eol - line > key_len &&
1743                     !strncmp(line, key, key_len) &&
1744                     line[key_len] == ' ') {
1745                         *out_len = eol - line - key_len - 1;
1746                         return line + key_len + 1;
1747                 }
1748                 line = *eol ? eol + 1 : NULL;
1749         }
1750         return NULL;
1751 }
1752
1753 /*
1754  * Inspect the given string and determine the true "end" of the log message, in
1755  * order to find where to put a new Signed-off-by: line.  Ignored are
1756  * trailing comment lines and blank lines.  To support "git commit -s
1757  * --amend" on an existing commit, we also ignore "Conflicts:".  To
1758  * support "git commit -v", we truncate at cut lines.
1759  *
1760  * Returns the number of bytes from the tail to ignore, to be fed as
1761  * the second parameter to append_signoff().
1762  */
1763 int ignore_non_trailer(const char *buf, size_t len)
1764 {
1765         int boc = 0;
1766         int bol = 0;
1767         int in_old_conflicts_block = 0;
1768         size_t cutoff = wt_status_locate_end(buf, len);
1769
1770         while (bol < cutoff) {
1771                 const char *next_line = memchr(buf + bol, '\n', len - bol);
1772
1773                 if (!next_line)
1774                         next_line = buf + len;
1775                 else
1776                         next_line++;
1777
1778                 if (buf[bol] == comment_line_char || buf[bol] == '\n') {
1779                         /* is this the first of the run of comments? */
1780                         if (!boc)
1781                                 boc = bol;
1782                         /* otherwise, it is just continuing */
1783                 } else if (starts_with(buf + bol, "Conflicts:\n")) {
1784                         in_old_conflicts_block = 1;
1785                         if (!boc)
1786                                 boc = bol;
1787                 } else if (in_old_conflicts_block && buf[bol] == '\t') {
1788                         ; /* a pathname in the conflicts block */
1789                 } else if (boc) {
1790                         /* the previous was not trailing comment */
1791                         boc = 0;
1792                         in_old_conflicts_block = 0;
1793                 }
1794                 bol = next_line - buf;
1795         }
1796         return boc ? len - boc : len - cutoff;
1797 }