object: add repository argument to lookup_object
[git] / commit.c
1 #include "cache.h"
2 #include "tag.h"
3 #include "commit.h"
4 #include "commit-graph.h"
5 #include "repository.h"
6 #include "object-store.h"
7 #include "pkt-line.h"
8 #include "utf8.h"
9 #include "diff.h"
10 #include "revision.h"
11 #include "notes.h"
12 #include "alloc.h"
13 #include "gpg-interface.h"
14 #include "mergesort.h"
15 #include "commit-slab.h"
16 #include "prio-queue.h"
17 #include "sha1-lookup.h"
18 #include "wt-status.h"
19 #include "advice.h"
20
21 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
22
23 int save_commit_buffer = 1;
24
25 const char *commit_type = "commit";
26
27 struct commit *lookup_commit_reference_gently(const struct object_id *oid,
28                                               int quiet)
29 {
30         struct object *obj = deref_tag(parse_object(the_repository, oid),
31                                        NULL, 0);
32
33         if (!obj)
34                 return NULL;
35         return object_as_type(obj, OBJ_COMMIT, quiet);
36 }
37
38 struct commit *lookup_commit_reference(const struct object_id *oid)
39 {
40         return lookup_commit_reference_gently(oid, 0);
41 }
42
43 struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name)
44 {
45         struct commit *c = lookup_commit_reference(oid);
46         if (!c)
47                 die(_("could not parse %s"), ref_name);
48         if (oidcmp(oid, &c->object.oid)) {
49                 warning(_("%s %s is not a commit!"),
50                         ref_name, oid_to_hex(oid));
51         }
52         return c;
53 }
54
55 struct commit *lookup_commit(const struct object_id *oid)
56 {
57         struct object *obj = lookup_object(the_repository, oid->hash);
58         if (!obj)
59                 return create_object(the_repository, oid->hash,
60                                      alloc_commit_node(the_repository));
61         return object_as_type(obj, OBJ_COMMIT, 0);
62 }
63
64 struct commit *lookup_commit_reference_by_name(const char *name)
65 {
66         struct object_id oid;
67         struct commit *commit;
68
69         if (get_oid_committish(name, &oid))
70                 return NULL;
71         commit = lookup_commit_reference(&oid);
72         if (parse_commit(commit))
73                 return NULL;
74         return commit;
75 }
76
77 static timestamp_t parse_commit_date(const char *buf, const char *tail)
78 {
79         const char *dateptr;
80
81         if (buf + 6 >= tail)
82                 return 0;
83         if (memcmp(buf, "author", 6))
84                 return 0;
85         while (buf < tail && *buf++ != '\n')
86                 /* nada */;
87         if (buf + 9 >= tail)
88                 return 0;
89         if (memcmp(buf, "committer", 9))
90                 return 0;
91         while (buf < tail && *buf++ != '>')
92                 /* nada */;
93         if (buf >= tail)
94                 return 0;
95         dateptr = buf;
96         while (buf < tail && *buf++ != '\n')
97                 /* nada */;
98         if (buf >= tail)
99                 return 0;
100         /* dateptr < buf && buf[-1] == '\n', so parsing will stop at buf-1 */
101         return parse_timestamp(dateptr, NULL, 10);
102 }
103
104 static const unsigned char *commit_graft_sha1_access(size_t index, void *table)
105 {
106         struct commit_graft **commit_graft_table = table;
107         return commit_graft_table[index]->oid.hash;
108 }
109
110 static int commit_graft_pos(struct repository *r, const unsigned char *sha1)
111 {
112         return sha1_pos(sha1, r->parsed_objects->grafts,
113                         r->parsed_objects->grafts_nr,
114                         commit_graft_sha1_access);
115 }
116
117 int register_commit_graft(struct repository *r, struct commit_graft *graft,
118                           int ignore_dups)
119 {
120         int pos = commit_graft_pos(r, graft->oid.hash);
121
122         if (0 <= pos) {
123                 if (ignore_dups)
124                         free(graft);
125                 else {
126                         free(r->parsed_objects->grafts[pos]);
127                         r->parsed_objects->grafts[pos] = graft;
128                 }
129                 return 1;
130         }
131         pos = -pos - 1;
132         ALLOC_GROW(r->parsed_objects->grafts,
133                    r->parsed_objects->grafts_nr + 1,
134                    r->parsed_objects->grafts_alloc);
135         r->parsed_objects->grafts_nr++;
136         if (pos < r->parsed_objects->grafts_nr)
137                 memmove(r->parsed_objects->grafts + pos + 1,
138                         r->parsed_objects->grafts + pos,
139                         (r->parsed_objects->grafts_nr - pos - 1) *
140                         sizeof(*r->parsed_objects->grafts));
141         r->parsed_objects->grafts[pos] = graft;
142         return 0;
143 }
144
145 struct commit_graft *read_graft_line(struct strbuf *line)
146 {
147         /* The format is just "Commit Parent1 Parent2 ...\n" */
148         int i, phase;
149         const char *tail = NULL;
150         struct commit_graft *graft = NULL;
151         struct object_id dummy_oid, *oid;
152
153         strbuf_rtrim(line);
154         if (!line->len || line->buf[0] == '#')
155                 return NULL;
156         /*
157          * phase 0 verifies line, counts hashes in line and allocates graft
158          * phase 1 fills graft
159          */
160         for (phase = 0; phase < 2; phase++) {
161                 oid = graft ? &graft->oid : &dummy_oid;
162                 if (parse_oid_hex(line->buf, oid, &tail))
163                         goto bad_graft_data;
164                 for (i = 0; *tail != '\0'; i++) {
165                         oid = graft ? &graft->parent[i] : &dummy_oid;
166                         if (!isspace(*tail++) || parse_oid_hex(tail, oid, &tail))
167                                 goto bad_graft_data;
168                 }
169                 if (!graft) {
170                         graft = xmalloc(st_add(sizeof(*graft),
171                                                st_mult(sizeof(struct object_id), i)));
172                         graft->nr_parent = i;
173                 }
174         }
175         return graft;
176
177 bad_graft_data:
178         error("bad graft data: %s", line->buf);
179         assert(!graft);
180         return NULL;
181 }
182
183 static int read_graft_file(struct repository *r, const char *graft_file)
184 {
185         FILE *fp = fopen_or_warn(graft_file, "r");
186         struct strbuf buf = STRBUF_INIT;
187         if (!fp)
188                 return -1;
189         if (advice_graft_file_deprecated)
190                 advise(_("Support for <GIT_DIR>/info/grafts is deprecated\n"
191                          "and will be removed in a future Git version.\n"
192                          "\n"
193                          "Please use \"git replace --convert-graft-file\"\n"
194                          "to convert the grafts into replace refs.\n"
195                          "\n"
196                          "Turn this message off by running\n"
197                          "\"git config advice.graftFileDeprecated false\""));
198         while (!strbuf_getwholeline(&buf, fp, '\n')) {
199                 /* The format is just "Commit Parent1 Parent2 ...\n" */
200                 struct commit_graft *graft = read_graft_line(&buf);
201                 if (!graft)
202                         continue;
203                 if (register_commit_graft(r, graft, 1))
204                         error("duplicate graft data: %s", buf.buf);
205         }
206         fclose(fp);
207         strbuf_release(&buf);
208         return 0;
209 }
210
211 static void prepare_commit_graft(struct repository *r)
212 {
213         char *graft_file;
214
215         if (r->parsed_objects->commit_graft_prepared)
216                 return;
217         if (!startup_info->have_repository)
218                 return;
219
220         graft_file = get_graft_file(r);
221         read_graft_file(r, graft_file);
222         /* make sure shallows are read */
223         is_repository_shallow(r);
224         r->parsed_objects->commit_graft_prepared = 1;
225 }
226
227 struct commit_graft *lookup_commit_graft(struct repository *r, const struct object_id *oid)
228 {
229         int pos;
230         prepare_commit_graft(r);
231         pos = commit_graft_pos(r, oid->hash);
232         if (pos < 0)
233                 return NULL;
234         return r->parsed_objects->grafts[pos];
235 }
236
237 int for_each_commit_graft(each_commit_graft_fn fn, void *cb_data)
238 {
239         int i, ret;
240         for (i = ret = 0; i < the_repository->parsed_objects->grafts_nr && !ret; i++)
241                 ret = fn(the_repository->parsed_objects->grafts[i], cb_data);
242         return ret;
243 }
244
245 int unregister_shallow(const struct object_id *oid)
246 {
247         int pos = commit_graft_pos(the_repository, oid->hash);
248         if (pos < 0)
249                 return -1;
250         if (pos + 1 < the_repository->parsed_objects->grafts_nr)
251                 MOVE_ARRAY(the_repository->parsed_objects->grafts + pos,
252                            the_repository->parsed_objects->grafts + pos + 1,
253                            the_repository->parsed_objects->grafts_nr - pos - 1);
254         the_repository->parsed_objects->grafts_nr--;
255         return 0;
256 }
257
258 struct commit_buffer {
259         void *buffer;
260         unsigned long size;
261 };
262 define_commit_slab(buffer_slab, struct commit_buffer);
263 static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab);
264
265 void set_commit_buffer(struct commit *commit, void *buffer, unsigned long size)
266 {
267         struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit);
268         v->buffer = buffer;
269         v->size = size;
270 }
271
272 const void *get_cached_commit_buffer(const struct commit *commit, unsigned long *sizep)
273 {
274         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
275         if (!v) {
276                 if (sizep)
277                         *sizep = 0;
278                 return NULL;
279         }
280         if (sizep)
281                 *sizep = v->size;
282         return v->buffer;
283 }
284
285 const void *get_commit_buffer(const struct commit *commit, unsigned long *sizep)
286 {
287         const void *ret = get_cached_commit_buffer(commit, sizep);
288         if (!ret) {
289                 enum object_type type;
290                 unsigned long size;
291                 ret = read_object_file(&commit->object.oid, &type, &size);
292                 if (!ret)
293                         die("cannot read commit object %s",
294                             oid_to_hex(&commit->object.oid));
295                 if (type != OBJ_COMMIT)
296                         die("expected commit for %s, got %s",
297                             oid_to_hex(&commit->object.oid), type_name(type));
298                 if (sizep)
299                         *sizep = size;
300         }
301         return ret;
302 }
303
304 void unuse_commit_buffer(const struct commit *commit, const void *buffer)
305 {
306         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
307         if (!(v && v->buffer == buffer))
308                 free((void *)buffer);
309 }
310
311 void free_commit_buffer(struct commit *commit)
312 {
313         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
314         if (v) {
315                 FREE_AND_NULL(v->buffer);
316                 v->size = 0;
317         }
318 }
319
320 struct tree *get_commit_tree(const struct commit *commit)
321 {
322         if (commit->maybe_tree || !commit->object.parsed)
323                 return commit->maybe_tree;
324
325         if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH)
326                 BUG("commit has NULL tree, but was not loaded from commit-graph");
327
328         return get_commit_tree_in_graph(commit);
329 }
330
331 struct object_id *get_commit_tree_oid(const struct commit *commit)
332 {
333         return &get_commit_tree(commit)->object.oid;
334 }
335
336 void release_commit_memory(struct commit *c)
337 {
338         c->maybe_tree = NULL;
339         c->index = 0;
340         free_commit_buffer(c);
341         free_commit_list(c->parents);
342         /* TODO: what about commit->util? */
343
344         c->object.parsed = 0;
345 }
346
347 const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep)
348 {
349         struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit);
350         void *ret;
351
352         if (!v) {
353                 if (sizep)
354                         *sizep = 0;
355                 return NULL;
356         }
357         ret = v->buffer;
358         if (sizep)
359                 *sizep = v->size;
360
361         v->buffer = NULL;
362         v->size = 0;
363         return ret;
364 }
365
366 int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph)
367 {
368         const char *tail = buffer;
369         const char *bufptr = buffer;
370         struct object_id parent;
371         struct commit_list **pptr;
372         struct commit_graft *graft;
373         const int tree_entry_len = GIT_SHA1_HEXSZ + 5;
374         const int parent_entry_len = GIT_SHA1_HEXSZ + 7;
375
376         if (item->object.parsed)
377                 return 0;
378         item->object.parsed = 1;
379         tail += size;
380         if (tail <= bufptr + tree_entry_len + 1 || memcmp(bufptr, "tree ", 5) ||
381                         bufptr[tree_entry_len] != '\n')
382                 return error("bogus commit object %s", oid_to_hex(&item->object.oid));
383         if (get_oid_hex(bufptr + 5, &parent) < 0)
384                 return error("bad tree pointer in commit %s",
385                              oid_to_hex(&item->object.oid));
386         item->maybe_tree = lookup_tree(&parent);
387         bufptr += tree_entry_len + 1; /* "tree " + "hex sha1" + "\n" */
388         pptr = &item->parents;
389
390         graft = lookup_commit_graft(the_repository, &item->object.oid);
391         while (bufptr + parent_entry_len < tail && !memcmp(bufptr, "parent ", 7)) {
392                 struct commit *new_parent;
393
394                 if (tail <= bufptr + parent_entry_len + 1 ||
395                     get_oid_hex(bufptr + 7, &parent) ||
396                     bufptr[parent_entry_len] != '\n')
397                         return error("bad parents in commit %s", oid_to_hex(&item->object.oid));
398                 bufptr += parent_entry_len + 1;
399                 /*
400                  * The clone is shallow if nr_parent < 0, and we must
401                  * not traverse its real parents even when we unhide them.
402                  */
403                 if (graft && (graft->nr_parent < 0 || grafts_replace_parents))
404                         continue;
405                 new_parent = lookup_commit(&parent);
406                 if (new_parent)
407                         pptr = &commit_list_insert(new_parent, pptr)->next;
408         }
409         if (graft) {
410                 int i;
411                 struct commit *new_parent;
412                 for (i = 0; i < graft->nr_parent; i++) {
413                         new_parent = lookup_commit(&graft->parent[i]);
414                         if (!new_parent)
415                                 continue;
416                         pptr = &commit_list_insert(new_parent, pptr)->next;
417                 }
418         }
419         item->date = parse_commit_date(bufptr, tail);
420
421         if (check_graph)
422                 load_commit_graph_info(item);
423
424         return 0;
425 }
426
427 int parse_commit_gently(struct commit *item, int quiet_on_missing)
428 {
429         enum object_type type;
430         void *buffer;
431         unsigned long size;
432         int ret;
433
434         if (!item)
435                 return -1;
436         if (item->object.parsed)
437                 return 0;
438         if (parse_commit_in_graph(item))
439                 return 0;
440         buffer = read_object_file(&item->object.oid, &type, &size);
441         if (!buffer)
442                 return quiet_on_missing ? -1 :
443                         error("Could not read %s",
444                              oid_to_hex(&item->object.oid));
445         if (type != OBJ_COMMIT) {
446                 free(buffer);
447                 return error("Object %s not a commit",
448                              oid_to_hex(&item->object.oid));
449         }
450         ret = parse_commit_buffer(item, buffer, size, 0);
451         if (save_commit_buffer && !ret) {
452                 set_commit_buffer(item, buffer, size);
453                 return 0;
454         }
455         free(buffer);
456         return ret;
457 }
458
459 void parse_commit_or_die(struct commit *item)
460 {
461         if (parse_commit(item))
462                 die("unable to parse commit %s",
463                     item ? oid_to_hex(&item->object.oid) : "(null)");
464 }
465
466 int find_commit_subject(const char *commit_buffer, const char **subject)
467 {
468         const char *eol;
469         const char *p = commit_buffer;
470
471         while (*p && (*p != '\n' || p[1] != '\n'))
472                 p++;
473         if (*p) {
474                 p = skip_blank_lines(p + 2);
475                 eol = strchrnul(p, '\n');
476         } else
477                 eol = p;
478
479         *subject = p;
480
481         return eol - p;
482 }
483
484 struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p)
485 {
486         struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
487         new_list->item = item;
488         new_list->next = *list_p;
489         *list_p = new_list;
490         return new_list;
491 }
492
493 unsigned commit_list_count(const struct commit_list *l)
494 {
495         unsigned c = 0;
496         for (; l; l = l->next )
497                 c++;
498         return c;
499 }
500
501 struct commit_list *copy_commit_list(struct commit_list *list)
502 {
503         struct commit_list *head = NULL;
504         struct commit_list **pp = &head;
505         while (list) {
506                 pp = commit_list_append(list->item, pp);
507                 list = list->next;
508         }
509         return head;
510 }
511
512 void free_commit_list(struct commit_list *list)
513 {
514         while (list)
515                 pop_commit(&list);
516 }
517
518 struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
519 {
520         struct commit_list **pp = list;
521         struct commit_list *p;
522         while ((p = *pp) != NULL) {
523                 if (p->item->date < item->date) {
524                         break;
525                 }
526                 pp = &p->next;
527         }
528         return commit_list_insert(item, pp);
529 }
530
531 static int commit_list_compare_by_date(const void *a, const void *b)
532 {
533         timestamp_t a_date = ((const struct commit_list *)a)->item->date;
534         timestamp_t b_date = ((const struct commit_list *)b)->item->date;
535         if (a_date < b_date)
536                 return 1;
537         if (a_date > b_date)
538                 return -1;
539         return 0;
540 }
541
542 static void *commit_list_get_next(const void *a)
543 {
544         return ((const struct commit_list *)a)->next;
545 }
546
547 static void commit_list_set_next(void *a, void *next)
548 {
549         ((struct commit_list *)a)->next = next;
550 }
551
552 void commit_list_sort_by_date(struct commit_list **list)
553 {
554         *list = llist_mergesort(*list, commit_list_get_next, commit_list_set_next,
555                                 commit_list_compare_by_date);
556 }
557
558 struct commit *pop_most_recent_commit(struct commit_list **list,
559                                       unsigned int mark)
560 {
561         struct commit *ret = pop_commit(list);
562         struct commit_list *parents = ret->parents;
563
564         while (parents) {
565                 struct commit *commit = parents->item;
566                 if (!parse_commit(commit) && !(commit->object.flags & mark)) {
567                         commit->object.flags |= mark;
568                         commit_list_insert_by_date(commit, list);
569                 }
570                 parents = parents->next;
571         }
572         return ret;
573 }
574
575 static void clear_commit_marks_1(struct commit_list **plist,
576                                  struct commit *commit, unsigned int mark)
577 {
578         while (commit) {
579                 struct commit_list *parents;
580
581                 if (!(mark & commit->object.flags))
582                         return;
583
584                 commit->object.flags &= ~mark;
585
586                 parents = commit->parents;
587                 if (!parents)
588                         return;
589
590                 while ((parents = parents->next))
591                         commit_list_insert(parents->item, plist);
592
593                 commit = commit->parents->item;
594         }
595 }
596
597 void clear_commit_marks_many(int nr, struct commit **commit, unsigned int mark)
598 {
599         struct commit_list *list = NULL;
600
601         while (nr--) {
602                 clear_commit_marks_1(&list, *commit, mark);
603                 commit++;
604         }
605         while (list)
606                 clear_commit_marks_1(&list, pop_commit(&list), mark);
607 }
608
609 void clear_commit_marks(struct commit *commit, unsigned int mark)
610 {
611         clear_commit_marks_many(1, &commit, mark);
612 }
613
614 struct commit *pop_commit(struct commit_list **stack)
615 {
616         struct commit_list *top = *stack;
617         struct commit *item = top ? top->item : NULL;
618
619         if (top) {
620                 *stack = top->next;
621                 free(top);
622         }
623         return item;
624 }
625
626 /*
627  * Topological sort support
628  */
629
630 /* count number of children that have not been emitted */
631 define_commit_slab(indegree_slab, int);
632
633 /* record author-date for each commit object */
634 define_commit_slab(author_date_slab, unsigned long);
635
636 static void record_author_date(struct author_date_slab *author_date,
637                                struct commit *commit)
638 {
639         const char *buffer = get_commit_buffer(commit, NULL);
640         struct ident_split ident;
641         const char *ident_line;
642         size_t ident_len;
643         char *date_end;
644         timestamp_t date;
645
646         ident_line = find_commit_header(buffer, "author", &ident_len);
647         if (!ident_line)
648                 goto fail_exit; /* no author line */
649         if (split_ident_line(&ident, ident_line, ident_len) ||
650             !ident.date_begin || !ident.date_end)
651                 goto fail_exit; /* malformed "author" line */
652
653         date = parse_timestamp(ident.date_begin, &date_end, 10);
654         if (date_end != ident.date_end)
655                 goto fail_exit; /* malformed date */
656         *(author_date_slab_at(author_date, commit)) = date;
657
658 fail_exit:
659         unuse_commit_buffer(commit, buffer);
660 }
661
662 static int compare_commits_by_author_date(const void *a_, const void *b_,
663                                           void *cb_data)
664 {
665         const struct commit *a = a_, *b = b_;
666         struct author_date_slab *author_date = cb_data;
667         timestamp_t a_date = *(author_date_slab_at(author_date, a));
668         timestamp_t b_date = *(author_date_slab_at(author_date, b));
669
670         /* newer commits with larger date first */
671         if (a_date < b_date)
672                 return 1;
673         else if (a_date > b_date)
674                 return -1;
675         return 0;
676 }
677
678 int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused)
679 {
680         const struct commit *a = a_, *b = b_;
681
682         /* newer commits first */
683         if (a->generation < b->generation)
684                 return 1;
685         else if (a->generation > b->generation)
686                 return -1;
687
688         /* use date as a heuristic when generations are equal */
689         if (a->date < b->date)
690                 return 1;
691         else if (a->date > b->date)
692                 return -1;
693         return 0;
694 }
695
696 int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused)
697 {
698         const struct commit *a = a_, *b = b_;
699         /* newer commits with larger date first */
700         if (a->date < b->date)
701                 return 1;
702         else if (a->date > b->date)
703                 return -1;
704         return 0;
705 }
706
707 /*
708  * Performs an in-place topological sort on the list supplied.
709  */
710 void sort_in_topological_order(struct commit_list **list, enum rev_sort_order sort_order)
711 {
712         struct commit_list *next, *orig = *list;
713         struct commit_list **pptr;
714         struct indegree_slab indegree;
715         struct prio_queue queue;
716         struct commit *commit;
717         struct author_date_slab author_date;
718
719         if (!orig)
720                 return;
721         *list = NULL;
722
723         init_indegree_slab(&indegree);
724         memset(&queue, '\0', sizeof(queue));
725
726         switch (sort_order) {
727         default: /* REV_SORT_IN_GRAPH_ORDER */
728                 queue.compare = NULL;
729                 break;
730         case REV_SORT_BY_COMMIT_DATE:
731                 queue.compare = compare_commits_by_commit_date;
732                 break;
733         case REV_SORT_BY_AUTHOR_DATE:
734                 init_author_date_slab(&author_date);
735                 queue.compare = compare_commits_by_author_date;
736                 queue.cb_data = &author_date;
737                 break;
738         }
739
740         /* Mark them and clear the indegree */
741         for (next = orig; next; next = next->next) {
742                 struct commit *commit = next->item;
743                 *(indegree_slab_at(&indegree, commit)) = 1;
744                 /* also record the author dates, if needed */
745                 if (sort_order == REV_SORT_BY_AUTHOR_DATE)
746                         record_author_date(&author_date, commit);
747         }
748
749         /* update the indegree */
750         for (next = orig; next; next = next->next) {
751                 struct commit_list *parents = next->item->parents;
752                 while (parents) {
753                         struct commit *parent = parents->item;
754                         int *pi = indegree_slab_at(&indegree, parent);
755
756                         if (*pi)
757                                 (*pi)++;
758                         parents = parents->next;
759                 }
760         }
761
762         /*
763          * find the tips
764          *
765          * tips are nodes not reachable from any other node in the list
766          *
767          * the tips serve as a starting set for the work queue.
768          */
769         for (next = orig; next; next = next->next) {
770                 struct commit *commit = next->item;
771
772                 if (*(indegree_slab_at(&indegree, commit)) == 1)
773                         prio_queue_put(&queue, commit);
774         }
775
776         /*
777          * This is unfortunate; the initial tips need to be shown
778          * in the order given from the revision traversal machinery.
779          */
780         if (sort_order == REV_SORT_IN_GRAPH_ORDER)
781                 prio_queue_reverse(&queue);
782
783         /* We no longer need the commit list */
784         free_commit_list(orig);
785
786         pptr = list;
787         *list = NULL;
788         while ((commit = prio_queue_get(&queue)) != NULL) {
789                 struct commit_list *parents;
790
791                 for (parents = commit->parents; parents ; parents = parents->next) {
792                         struct commit *parent = parents->item;
793                         int *pi = indegree_slab_at(&indegree, parent);
794
795                         if (!*pi)
796                                 continue;
797
798                         /*
799                          * parents are only enqueued for emission
800                          * when all their children have been emitted thereby
801                          * guaranteeing topological order.
802                          */
803                         if (--(*pi) == 1)
804                                 prio_queue_put(&queue, parent);
805                 }
806                 /*
807                  * all children of commit have already been
808                  * emitted. we can emit it now.
809                  */
810                 *(indegree_slab_at(&indegree, commit)) = 0;
811
812                 pptr = &commit_list_insert(commit, pptr)->next;
813         }
814
815         clear_indegree_slab(&indegree);
816         clear_prio_queue(&queue);
817         if (sort_order == REV_SORT_BY_AUTHOR_DATE)
818                 clear_author_date_slab(&author_date);
819 }
820
821 /* merge-base stuff */
822
823 /* Remember to update object flag allocation in object.h */
824 #define PARENT1         (1u<<16)
825 #define PARENT2         (1u<<17)
826 #define STALE           (1u<<18)
827 #define RESULT          (1u<<19)
828
829 static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
830
831 static int queue_has_nonstale(struct prio_queue *queue)
832 {
833         int i;
834         for (i = 0; i < queue->nr; i++) {
835                 struct commit *commit = queue->array[i].data;
836                 if (!(commit->object.flags & STALE))
837                         return 1;
838         }
839         return 0;
840 }
841
842 /* all input commits in one and twos[] must have been parsed! */
843 static struct commit_list *paint_down_to_common(struct commit *one, int n,
844                                                 struct commit **twos,
845                                                 int min_generation)
846 {
847         struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
848         struct commit_list *result = NULL;
849         int i;
850         uint32_t last_gen = GENERATION_NUMBER_INFINITY;
851
852         one->object.flags |= PARENT1;
853         if (!n) {
854                 commit_list_append(one, &result);
855                 return result;
856         }
857         prio_queue_put(&queue, one);
858
859         for (i = 0; i < n; i++) {
860                 twos[i]->object.flags |= PARENT2;
861                 prio_queue_put(&queue, twos[i]);
862         }
863
864         while (queue_has_nonstale(&queue)) {
865                 struct commit *commit = prio_queue_get(&queue);
866                 struct commit_list *parents;
867                 int flags;
868
869                 if (commit->generation > last_gen)
870                         BUG("bad generation skip %8x > %8x at %s",
871                             commit->generation, last_gen,
872                             oid_to_hex(&commit->object.oid));
873                 last_gen = commit->generation;
874
875                 if (commit->generation < min_generation)
876                         break;
877
878                 flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
879                 if (flags == (PARENT1 | PARENT2)) {
880                         if (!(commit->object.flags & RESULT)) {
881                                 commit->object.flags |= RESULT;
882                                 commit_list_insert_by_date(commit, &result);
883                         }
884                         /* Mark parents of a found merge stale */
885                         flags |= STALE;
886                 }
887                 parents = commit->parents;
888                 while (parents) {
889                         struct commit *p = parents->item;
890                         parents = parents->next;
891                         if ((p->object.flags & flags) == flags)
892                                 continue;
893                         if (parse_commit(p))
894                                 return NULL;
895                         p->object.flags |= flags;
896                         prio_queue_put(&queue, p);
897                 }
898         }
899
900         clear_prio_queue(&queue);
901         return result;
902 }
903
904 static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
905 {
906         struct commit_list *list = NULL;
907         struct commit_list *result = NULL;
908         int i;
909
910         for (i = 0; i < n; i++) {
911                 if (one == twos[i])
912                         /*
913                          * We do not mark this even with RESULT so we do not
914                          * have to clean it up.
915                          */
916                         return commit_list_insert(one, &result);
917         }
918
919         if (parse_commit(one))
920                 return NULL;
921         for (i = 0; i < n; i++) {
922                 if (parse_commit(twos[i]))
923                         return NULL;
924         }
925
926         list = paint_down_to_common(one, n, twos, 0);
927
928         while (list) {
929                 struct commit *commit = pop_commit(&list);
930                 if (!(commit->object.flags & STALE))
931                         commit_list_insert_by_date(commit, &result);
932         }
933         return result;
934 }
935
936 struct commit_list *get_octopus_merge_bases(struct commit_list *in)
937 {
938         struct commit_list *i, *j, *k, *ret = NULL;
939
940         if (!in)
941                 return ret;
942
943         commit_list_insert(in->item, &ret);
944
945         for (i = in->next; i; i = i->next) {
946                 struct commit_list *new_commits = NULL, *end = NULL;
947
948                 for (j = ret; j; j = j->next) {
949                         struct commit_list *bases;
950                         bases = get_merge_bases(i->item, j->item);
951                         if (!new_commits)
952                                 new_commits = bases;
953                         else
954                                 end->next = bases;
955                         for (k = bases; k; k = k->next)
956                                 end = k;
957                 }
958                 ret = new_commits;
959         }
960         return ret;
961 }
962
963 static int remove_redundant(struct commit **array, int cnt)
964 {
965         /*
966          * Some commit in the array may be an ancestor of
967          * another commit.  Move such commit to the end of
968          * the array, and return the number of commits that
969          * are independent from each other.
970          */
971         struct commit **work;
972         unsigned char *redundant;
973         int *filled_index;
974         int i, j, filled;
975
976         work = xcalloc(cnt, sizeof(*work));
977         redundant = xcalloc(cnt, 1);
978         ALLOC_ARRAY(filled_index, cnt - 1);
979
980         for (i = 0; i < cnt; i++)
981                 parse_commit(array[i]);
982         for (i = 0; i < cnt; i++) {
983                 struct commit_list *common;
984                 uint32_t min_generation = array[i]->generation;
985
986                 if (redundant[i])
987                         continue;
988                 for (j = filled = 0; j < cnt; j++) {
989                         if (i == j || redundant[j])
990                                 continue;
991                         filled_index[filled] = j;
992                         work[filled++] = array[j];
993
994                         if (array[j]->generation < min_generation)
995                                 min_generation = array[j]->generation;
996                 }
997                 common = paint_down_to_common(array[i], filled, work,
998                                               min_generation);
999                 if (array[i]->object.flags & PARENT2)
1000                         redundant[i] = 1;
1001                 for (j = 0; j < filled; j++)
1002                         if (work[j]->object.flags & PARENT1)
1003                                 redundant[filled_index[j]] = 1;
1004                 clear_commit_marks(array[i], all_flags);
1005                 clear_commit_marks_many(filled, work, all_flags);
1006                 free_commit_list(common);
1007         }
1008
1009         /* Now collect the result */
1010         COPY_ARRAY(work, array, cnt);
1011         for (i = filled = 0; i < cnt; i++)
1012                 if (!redundant[i])
1013                         array[filled++] = work[i];
1014         for (j = filled, i = 0; i < cnt; i++)
1015                 if (redundant[i])
1016                         array[j++] = work[i];
1017         free(work);
1018         free(redundant);
1019         free(filled_index);
1020         return filled;
1021 }
1022
1023 static struct commit_list *get_merge_bases_many_0(struct commit *one,
1024                                                   int n,
1025                                                   struct commit **twos,
1026                                                   int cleanup)
1027 {
1028         struct commit_list *list;
1029         struct commit **rslt;
1030         struct commit_list *result;
1031         int cnt, i;
1032
1033         result = merge_bases_many(one, n, twos);
1034         for (i = 0; i < n; i++) {
1035                 if (one == twos[i])
1036                         return result;
1037         }
1038         if (!result || !result->next) {
1039                 if (cleanup) {
1040                         clear_commit_marks(one, all_flags);
1041                         clear_commit_marks_many(n, twos, all_flags);
1042                 }
1043                 return result;
1044         }
1045
1046         /* There are more than one */
1047         cnt = commit_list_count(result);
1048         rslt = xcalloc(cnt, sizeof(*rslt));
1049         for (list = result, i = 0; list; list = list->next)
1050                 rslt[i++] = list->item;
1051         free_commit_list(result);
1052
1053         clear_commit_marks(one, all_flags);
1054         clear_commit_marks_many(n, twos, all_flags);
1055
1056         cnt = remove_redundant(rslt, cnt);
1057         result = NULL;
1058         for (i = 0; i < cnt; i++)
1059                 commit_list_insert_by_date(rslt[i], &result);
1060         free(rslt);
1061         return result;
1062 }
1063
1064 struct commit_list *get_merge_bases_many(struct commit *one,
1065                                          int n,
1066                                          struct commit **twos)
1067 {
1068         return get_merge_bases_many_0(one, n, twos, 1);
1069 }
1070
1071 struct commit_list *get_merge_bases_many_dirty(struct commit *one,
1072                                                int n,
1073                                                struct commit **twos)
1074 {
1075         return get_merge_bases_many_0(one, n, twos, 0);
1076 }
1077
1078 struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
1079 {
1080         return get_merge_bases_many_0(one, 1, &two, 1);
1081 }
1082
1083 /*
1084  * Is "commit" a descendant of one of the elements on the "with_commit" list?
1085  */
1086 int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
1087 {
1088         if (!with_commit)
1089                 return 1;
1090         while (with_commit) {
1091                 struct commit *other;
1092
1093                 other = with_commit->item;
1094                 with_commit = with_commit->next;
1095                 if (in_merge_bases(other, commit))
1096                         return 1;
1097         }
1098         return 0;
1099 }
1100
1101 /*
1102  * Is "commit" an ancestor of one of the "references"?
1103  */
1104 int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
1105 {
1106         struct commit_list *bases;
1107         int ret = 0, i;
1108         uint32_t min_generation = GENERATION_NUMBER_INFINITY;
1109
1110         if (parse_commit(commit))
1111                 return ret;
1112         for (i = 0; i < nr_reference; i++) {
1113                 if (parse_commit(reference[i]))
1114                         return ret;
1115                 if (reference[i]->generation < min_generation)
1116                         min_generation = reference[i]->generation;
1117         }
1118
1119         if (commit->generation > min_generation)
1120                 return ret;
1121
1122         bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
1123         if (commit->object.flags & PARENT2)
1124                 ret = 1;
1125         clear_commit_marks(commit, all_flags);
1126         clear_commit_marks_many(nr_reference, reference, all_flags);
1127         free_commit_list(bases);
1128         return ret;
1129 }
1130
1131 /*
1132  * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
1133  */
1134 int in_merge_bases(struct commit *commit, struct commit *reference)
1135 {
1136         return in_merge_bases_many(commit, 1, &reference);
1137 }
1138
1139 struct commit_list *reduce_heads(struct commit_list *heads)
1140 {
1141         struct commit_list *p;
1142         struct commit_list *result = NULL, **tail = &result;
1143         struct commit **array;
1144         int num_head, i;
1145
1146         if (!heads)
1147                 return NULL;
1148
1149         /* Uniquify */
1150         for (p = heads; p; p = p->next)
1151                 p->item->object.flags &= ~STALE;
1152         for (p = heads, num_head = 0; p; p = p->next) {
1153                 if (p->item->object.flags & STALE)
1154                         continue;
1155                 p->item->object.flags |= STALE;
1156                 num_head++;
1157         }
1158         array = xcalloc(num_head, sizeof(*array));
1159         for (p = heads, i = 0; p; p = p->next) {
1160                 if (p->item->object.flags & STALE) {
1161                         array[i++] = p->item;
1162                         p->item->object.flags &= ~STALE;
1163                 }
1164         }
1165         num_head = remove_redundant(array, num_head);
1166         for (i = 0; i < num_head; i++)
1167                 tail = &commit_list_insert(array[i], tail)->next;
1168         free(array);
1169         return result;
1170 }
1171
1172 void reduce_heads_replace(struct commit_list **heads)
1173 {
1174         struct commit_list *result = reduce_heads(*heads);
1175         free_commit_list(*heads);
1176         *heads = result;
1177 }
1178
1179 static const char gpg_sig_header[] = "gpgsig";
1180 static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
1181
1182 static int do_sign_commit(struct strbuf *buf, const char *keyid)
1183 {
1184         struct strbuf sig = STRBUF_INIT;
1185         int inspos, copypos;
1186         const char *eoh;
1187
1188         /* find the end of the header */
1189         eoh = strstr(buf->buf, "\n\n");
1190         if (!eoh)
1191                 inspos = buf->len;
1192         else
1193                 inspos = eoh - buf->buf + 1;
1194
1195         if (!keyid || !*keyid)
1196                 keyid = get_signing_key();
1197         if (sign_buffer(buf, &sig, keyid)) {
1198                 strbuf_release(&sig);
1199                 return -1;
1200         }
1201
1202         for (copypos = 0; sig.buf[copypos]; ) {
1203                 const char *bol = sig.buf + copypos;
1204                 const char *eol = strchrnul(bol, '\n');
1205                 int len = (eol - bol) + !!*eol;
1206
1207                 if (!copypos) {
1208                         strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len);
1209                         inspos += gpg_sig_header_len;
1210                 }
1211                 strbuf_insert(buf, inspos++, " ", 1);
1212                 strbuf_insert(buf, inspos, bol, len);
1213                 inspos += len;
1214                 copypos += len;
1215         }
1216         strbuf_release(&sig);
1217         return 0;
1218 }
1219
1220 int parse_signed_commit(const struct commit *commit,
1221                         struct strbuf *payload, struct strbuf *signature)
1222 {
1223
1224         unsigned long size;
1225         const char *buffer = get_commit_buffer(commit, &size);
1226         int in_signature, saw_signature = -1;
1227         const char *line, *tail;
1228
1229         line = buffer;
1230         tail = buffer + size;
1231         in_signature = 0;
1232         saw_signature = 0;
1233         while (line < tail) {
1234                 const char *sig = NULL;
1235                 const char *next = memchr(line, '\n', tail - line);
1236
1237                 next = next ? next + 1 : tail;
1238                 if (in_signature && line[0] == ' ')
1239                         sig = line + 1;
1240                 else if (starts_with(line, gpg_sig_header) &&
1241                          line[gpg_sig_header_len] == ' ')
1242                         sig = line + gpg_sig_header_len + 1;
1243                 if (sig) {
1244                         strbuf_add(signature, sig, next - sig);
1245                         saw_signature = 1;
1246                         in_signature = 1;
1247                 } else {
1248                         if (*line == '\n')
1249                                 /* dump the whole remainder of the buffer */
1250                                 next = tail;
1251                         strbuf_add(payload, line, next - line);
1252                         in_signature = 0;
1253                 }
1254                 line = next;
1255         }
1256         unuse_commit_buffer(commit, buffer);
1257         return saw_signature;
1258 }
1259
1260 int remove_signature(struct strbuf *buf)
1261 {
1262         const char *line = buf->buf;
1263         const char *tail = buf->buf + buf->len;
1264         int in_signature = 0;
1265         const char *sig_start = NULL;
1266         const char *sig_end = NULL;
1267
1268         while (line < tail) {
1269                 const char *next = memchr(line, '\n', tail - line);
1270                 next = next ? next + 1 : tail;
1271
1272                 if (in_signature && line[0] == ' ')
1273                         sig_end = next;
1274                 else if (starts_with(line, gpg_sig_header) &&
1275                          line[gpg_sig_header_len] == ' ') {
1276                         sig_start = line;
1277                         sig_end = next;
1278                         in_signature = 1;
1279                 } else {
1280                         if (*line == '\n')
1281                                 /* dump the whole remainder of the buffer */
1282                                 next = tail;
1283                         in_signature = 0;
1284                 }
1285                 line = next;
1286         }
1287
1288         if (sig_start)
1289                 strbuf_remove(buf, sig_start - buf->buf, sig_end - sig_start);
1290
1291         return sig_start != NULL;
1292 }
1293
1294 static void handle_signed_tag(struct commit *parent, struct commit_extra_header ***tail)
1295 {
1296         struct merge_remote_desc *desc;
1297         struct commit_extra_header *mergetag;
1298         char *buf;
1299         unsigned long size, len;
1300         enum object_type type;
1301
1302         desc = merge_remote_util(parent);
1303         if (!desc || !desc->obj)
1304                 return;
1305         buf = read_object_file(&desc->obj->oid, &type, &size);
1306         if (!buf || type != OBJ_TAG)
1307                 goto free_return;
1308         len = parse_signature(buf, size);
1309         if (size == len)
1310                 goto free_return;
1311         /*
1312          * We could verify this signature and either omit the tag when
1313          * it does not validate, but the integrator may not have the
1314          * public key of the signer of the tag he is merging, while a
1315          * later auditor may have it while auditing, so let's not run
1316          * verify-signed-buffer here for now...
1317          *
1318          * if (verify_signed_buffer(buf, len, buf + len, size - len, ...))
1319          *      warn("warning: signed tag unverified.");
1320          */
1321         mergetag = xcalloc(1, sizeof(*mergetag));
1322         mergetag->key = xstrdup("mergetag");
1323         mergetag->value = buf;
1324         mergetag->len = size;
1325
1326         **tail = mergetag;
1327         *tail = &mergetag->next;
1328         return;
1329
1330 free_return:
1331         free(buf);
1332 }
1333
1334 int check_commit_signature(const struct commit *commit, struct signature_check *sigc)
1335 {
1336         struct strbuf payload = STRBUF_INIT;
1337         struct strbuf signature = STRBUF_INIT;
1338         int ret = 1;
1339
1340         sigc->result = 'N';
1341
1342         if (parse_signed_commit(commit, &payload, &signature) <= 0)
1343                 goto out;
1344         ret = check_signature(payload.buf, payload.len, signature.buf,
1345                 signature.len, sigc);
1346
1347  out:
1348         strbuf_release(&payload);
1349         strbuf_release(&signature);
1350
1351         return ret;
1352 }
1353
1354
1355
1356 void append_merge_tag_headers(struct commit_list *parents,
1357                               struct commit_extra_header ***tail)
1358 {
1359         while (parents) {
1360                 struct commit *parent = parents->item;
1361                 handle_signed_tag(parent, tail);
1362                 parents = parents->next;
1363         }
1364 }
1365
1366 static void add_extra_header(struct strbuf *buffer,
1367                              struct commit_extra_header *extra)
1368 {
1369         strbuf_addstr(buffer, extra->key);
1370         if (extra->len)
1371                 strbuf_add_lines(buffer, " ", extra->value, extra->len);
1372         else
1373                 strbuf_addch(buffer, '\n');
1374 }
1375
1376 struct commit_extra_header *read_commit_extra_headers(struct commit *commit,
1377                                                       const char **exclude)
1378 {
1379         struct commit_extra_header *extra = NULL;
1380         unsigned long size;
1381         const char *buffer = get_commit_buffer(commit, &size);
1382         extra = read_commit_extra_header_lines(buffer, size, exclude);
1383         unuse_commit_buffer(commit, buffer);
1384         return extra;
1385 }
1386
1387 int for_each_mergetag(each_mergetag_fn fn, struct commit *commit, void *data)
1388 {
1389         struct commit_extra_header *extra, *to_free;
1390         int res = 0;
1391
1392         to_free = read_commit_extra_headers(commit, NULL);
1393         for (extra = to_free; !res && extra; extra = extra->next) {
1394                 if (strcmp(extra->key, "mergetag"))
1395                         continue; /* not a merge tag */
1396                 res = fn(commit, extra, data);
1397         }
1398         free_commit_extra_headers(to_free);
1399         return res;
1400 }
1401
1402 static inline int standard_header_field(const char *field, size_t len)
1403 {
1404         return ((len == 4 && !memcmp(field, "tree", 4)) ||
1405                 (len == 6 && !memcmp(field, "parent", 6)) ||
1406                 (len == 6 && !memcmp(field, "author", 6)) ||
1407                 (len == 9 && !memcmp(field, "committer", 9)) ||
1408                 (len == 8 && !memcmp(field, "encoding", 8)));
1409 }
1410
1411 static int excluded_header_field(const char *field, size_t len, const char **exclude)
1412 {
1413         if (!exclude)
1414                 return 0;
1415
1416         while (*exclude) {
1417                 size_t xlen = strlen(*exclude);
1418                 if (len == xlen && !memcmp(field, *exclude, xlen))
1419                         return 1;
1420                 exclude++;
1421         }
1422         return 0;
1423 }
1424
1425 static struct commit_extra_header *read_commit_extra_header_lines(
1426         const char *buffer, size_t size,
1427         const char **exclude)
1428 {
1429         struct commit_extra_header *extra = NULL, **tail = &extra, *it = NULL;
1430         const char *line, *next, *eof, *eob;
1431         struct strbuf buf = STRBUF_INIT;
1432
1433         for (line = buffer, eob = line + size;
1434              line < eob && *line != '\n';
1435              line = next) {
1436                 next = memchr(line, '\n', eob - line);
1437                 next = next ? next + 1 : eob;
1438                 if (*line == ' ') {
1439                         /* continuation */
1440                         if (it)
1441                                 strbuf_add(&buf, line + 1, next - (line + 1));
1442                         continue;
1443                 }
1444                 if (it)
1445                         it->value = strbuf_detach(&buf, &it->len);
1446                 strbuf_reset(&buf);
1447                 it = NULL;
1448
1449                 eof = memchr(line, ' ', next - line);
1450                 if (!eof)
1451                         eof = next;
1452                 else if (standard_header_field(line, eof - line) ||
1453                          excluded_header_field(line, eof - line, exclude))
1454                         continue;
1455
1456                 it = xcalloc(1, sizeof(*it));
1457                 it->key = xmemdupz(line, eof-line);
1458                 *tail = it;
1459                 tail = &it->next;
1460                 if (eof + 1 < next)
1461                         strbuf_add(&buf, eof + 1, next - (eof + 1));
1462         }
1463         if (it)
1464                 it->value = strbuf_detach(&buf, &it->len);
1465         return extra;
1466 }
1467
1468 void free_commit_extra_headers(struct commit_extra_header *extra)
1469 {
1470         while (extra) {
1471                 struct commit_extra_header *next = extra->next;
1472                 free(extra->key);
1473                 free(extra->value);
1474                 free(extra);
1475                 extra = next;
1476         }
1477 }
1478
1479 int commit_tree(const char *msg, size_t msg_len, const struct object_id *tree,
1480                 struct commit_list *parents, struct object_id *ret,
1481                 const char *author, const char *sign_commit)
1482 {
1483         struct commit_extra_header *extra = NULL, **tail = &extra;
1484         int result;
1485
1486         append_merge_tag_headers(parents, &tail);
1487         result = commit_tree_extended(msg, msg_len, tree, parents, ret,
1488                                       author, sign_commit, extra);
1489         free_commit_extra_headers(extra);
1490         return result;
1491 }
1492
1493 static int find_invalid_utf8(const char *buf, int len)
1494 {
1495         int offset = 0;
1496         static const unsigned int max_codepoint[] = {
1497                 0x7f, 0x7ff, 0xffff, 0x10ffff
1498         };
1499
1500         while (len) {
1501                 unsigned char c = *buf++;
1502                 int bytes, bad_offset;
1503                 unsigned int codepoint;
1504                 unsigned int min_val, max_val;
1505
1506                 len--;
1507                 offset++;
1508
1509                 /* Simple US-ASCII? No worries. */
1510                 if (c < 0x80)
1511                         continue;
1512
1513                 bad_offset = offset-1;
1514
1515                 /*
1516                  * Count how many more high bits set: that's how
1517                  * many more bytes this sequence should have.
1518                  */
1519                 bytes = 0;
1520                 while (c & 0x40) {
1521                         c <<= 1;
1522                         bytes++;
1523                 }
1524
1525                 /*
1526                  * Must be between 1 and 3 more bytes.  Longer sequences result in
1527                  * codepoints beyond U+10FFFF, which are guaranteed never to exist.
1528                  */
1529                 if (bytes < 1 || 3 < bytes)
1530                         return bad_offset;
1531
1532                 /* Do we *have* that many bytes? */
1533                 if (len < bytes)
1534                         return bad_offset;
1535
1536                 /*
1537                  * Place the encoded bits at the bottom of the value and compute the
1538                  * valid range.
1539                  */
1540                 codepoint = (c & 0x7f) >> bytes;
1541                 min_val = max_codepoint[bytes-1] + 1;
1542                 max_val = max_codepoint[bytes];
1543
1544                 offset += bytes;
1545                 len -= bytes;
1546
1547                 /* And verify that they are good continuation bytes */
1548                 do {
1549                         codepoint <<= 6;
1550                         codepoint |= *buf & 0x3f;
1551                         if ((*buf++ & 0xc0) != 0x80)
1552                                 return bad_offset;
1553                 } while (--bytes);
1554
1555                 /* Reject codepoints that are out of range for the sequence length. */
1556                 if (codepoint < min_val || codepoint > max_val)
1557                         return bad_offset;
1558                 /* Surrogates are only for UTF-16 and cannot be encoded in UTF-8. */
1559                 if ((codepoint & 0x1ff800) == 0xd800)
1560                         return bad_offset;
1561                 /* U+xxFFFE and U+xxFFFF are guaranteed non-characters. */
1562                 if ((codepoint & 0xfffe) == 0xfffe)
1563                         return bad_offset;
1564                 /* So are anything in the range U+FDD0..U+FDEF. */
1565                 if (codepoint >= 0xfdd0 && codepoint <= 0xfdef)
1566                         return bad_offset;
1567         }
1568         return -1;
1569 }
1570
1571 /*
1572  * This verifies that the buffer is in proper utf8 format.
1573  *
1574  * If it isn't, it assumes any non-utf8 characters are Latin1,
1575  * and does the conversion.
1576  */
1577 static int verify_utf8(struct strbuf *buf)
1578 {
1579         int ok = 1;
1580         long pos = 0;
1581
1582         for (;;) {
1583                 int bad;
1584                 unsigned char c;
1585                 unsigned char replace[2];
1586
1587                 bad = find_invalid_utf8(buf->buf + pos, buf->len - pos);
1588                 if (bad < 0)
1589                         return ok;
1590                 pos += bad;
1591                 ok = 0;
1592                 c = buf->buf[pos];
1593                 strbuf_remove(buf, pos, 1);
1594
1595                 /* We know 'c' must be in the range 128-255 */
1596                 replace[0] = 0xc0 + (c >> 6);
1597                 replace[1] = 0x80 + (c & 0x3f);
1598                 strbuf_insert(buf, pos, replace, 2);
1599                 pos += 2;
1600         }
1601 }
1602
1603 static const char commit_utf8_warn[] =
1604 N_("Warning: commit message did not conform to UTF-8.\n"
1605    "You may want to amend it after fixing the message, or set the config\n"
1606    "variable i18n.commitencoding to the encoding your project uses.\n");
1607
1608 int commit_tree_extended(const char *msg, size_t msg_len,
1609                          const struct object_id *tree,
1610                          struct commit_list *parents, struct object_id *ret,
1611                          const char *author, const char *sign_commit,
1612                          struct commit_extra_header *extra)
1613 {
1614         int result;
1615         int encoding_is_utf8;
1616         struct strbuf buffer;
1617
1618         assert_oid_type(tree, OBJ_TREE);
1619
1620         if (memchr(msg, '\0', msg_len))
1621                 return error("a NUL byte in commit log message not allowed.");
1622
1623         /* Not having i18n.commitencoding is the same as having utf-8 */
1624         encoding_is_utf8 = is_encoding_utf8(git_commit_encoding);
1625
1626         strbuf_init(&buffer, 8192); /* should avoid reallocs for the headers */
1627         strbuf_addf(&buffer, "tree %s\n", oid_to_hex(tree));
1628
1629         /*
1630          * NOTE! This ordering means that the same exact tree merged with a
1631          * different order of parents will be a _different_ changeset even
1632          * if everything else stays the same.
1633          */
1634         while (parents) {
1635                 struct commit *parent = pop_commit(&parents);
1636                 strbuf_addf(&buffer, "parent %s\n",
1637                             oid_to_hex(&parent->object.oid));
1638         }
1639
1640         /* Person/date information */
1641         if (!author)
1642                 author = git_author_info(IDENT_STRICT);
1643         strbuf_addf(&buffer, "author %s\n", author);
1644         strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_STRICT));
1645         if (!encoding_is_utf8)
1646                 strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding);
1647
1648         while (extra) {
1649                 add_extra_header(&buffer, extra);
1650                 extra = extra->next;
1651         }
1652         strbuf_addch(&buffer, '\n');
1653
1654         /* And add the comment */
1655         strbuf_add(&buffer, msg, msg_len);
1656
1657         /* And check the encoding */
1658         if (encoding_is_utf8 && !verify_utf8(&buffer))
1659                 fprintf(stderr, _(commit_utf8_warn));
1660
1661         if (sign_commit && do_sign_commit(&buffer, sign_commit)) {
1662                 result = -1;
1663                 goto out;
1664         }
1665
1666         result = write_object_file(buffer.buf, buffer.len, commit_type, ret);
1667 out:
1668         strbuf_release(&buffer);
1669         return result;
1670 }
1671
1672 define_commit_slab(merge_desc_slab, struct merge_remote_desc *);
1673 static struct merge_desc_slab merge_desc_slab = COMMIT_SLAB_INIT(1, merge_desc_slab);
1674
1675 struct merge_remote_desc *merge_remote_util(struct commit *commit)
1676 {
1677         return *merge_desc_slab_at(&merge_desc_slab, commit);
1678 }
1679
1680 void set_merge_remote_desc(struct commit *commit,
1681                            const char *name, struct object *obj)
1682 {
1683         struct merge_remote_desc *desc;
1684         FLEX_ALLOC_STR(desc, name, name);
1685         desc->obj = obj;
1686         *merge_desc_slab_at(&merge_desc_slab, commit) = desc;
1687 }
1688
1689 struct commit *get_merge_parent(const char *name)
1690 {
1691         struct object *obj;
1692         struct commit *commit;
1693         struct object_id oid;
1694         if (get_oid(name, &oid))
1695                 return NULL;
1696         obj = parse_object(the_repository, &oid);
1697         commit = (struct commit *)peel_to_type(name, 0, obj, OBJ_COMMIT);
1698         if (commit && !merge_remote_util(commit))
1699                 set_merge_remote_desc(commit, name, obj);
1700         return commit;
1701 }
1702
1703 /*
1704  * Append a commit to the end of the commit_list.
1705  *
1706  * next starts by pointing to the variable that holds the head of an
1707  * empty commit_list, and is updated to point to the "next" field of
1708  * the last item on the list as new commits are appended.
1709  *
1710  * Usage example:
1711  *
1712  *     struct commit_list *list;
1713  *     struct commit_list **next = &list;
1714  *
1715  *     next = commit_list_append(c1, next);
1716  *     next = commit_list_append(c2, next);
1717  *     assert(commit_list_count(list) == 2);
1718  *     return list;
1719  */
1720 struct commit_list **commit_list_append(struct commit *commit,
1721                                         struct commit_list **next)
1722 {
1723         struct commit_list *new_commit = xmalloc(sizeof(struct commit_list));
1724         new_commit->item = commit;
1725         *next = new_commit;
1726         new_commit->next = NULL;
1727         return &new_commit->next;
1728 }
1729
1730 const char *find_commit_header(const char *msg, const char *key, size_t *out_len)
1731 {
1732         int key_len = strlen(key);
1733         const char *line = msg;
1734
1735         while (line) {
1736                 const char *eol = strchrnul(line, '\n');
1737
1738                 if (line == eol)
1739                         return NULL;
1740
1741                 if (eol - line > key_len &&
1742                     !strncmp(line, key, key_len) &&
1743                     line[key_len] == ' ') {
1744                         *out_len = eol - line - key_len - 1;
1745                         return line + key_len + 1;
1746                 }
1747                 line = *eol ? eol + 1 : NULL;
1748         }
1749         return NULL;
1750 }
1751
1752 /*
1753  * Inspect the given string and determine the true "end" of the log message, in
1754  * order to find where to put a new Signed-off-by: line.  Ignored are
1755  * trailing comment lines and blank lines.  To support "git commit -s
1756  * --amend" on an existing commit, we also ignore "Conflicts:".  To
1757  * support "git commit -v", we truncate at cut lines.
1758  *
1759  * Returns the number of bytes from the tail to ignore, to be fed as
1760  * the second parameter to append_signoff().
1761  */
1762 int ignore_non_trailer(const char *buf, size_t len)
1763 {
1764         int boc = 0;
1765         int bol = 0;
1766         int in_old_conflicts_block = 0;
1767         size_t cutoff = wt_status_locate_end(buf, len);
1768
1769         while (bol < cutoff) {
1770                 const char *next_line = memchr(buf + bol, '\n', len - bol);
1771
1772                 if (!next_line)
1773                         next_line = buf + len;
1774                 else
1775                         next_line++;
1776
1777                 if (buf[bol] == comment_line_char || buf[bol] == '\n') {
1778                         /* is this the first of the run of comments? */
1779                         if (!boc)
1780                                 boc = bol;
1781                         /* otherwise, it is just continuing */
1782                 } else if (starts_with(buf + bol, "Conflicts:\n")) {
1783                         in_old_conflicts_block = 1;
1784                         if (!boc)
1785                                 boc = bol;
1786                 } else if (in_old_conflicts_block && buf[bol] == '\t') {
1787                         ; /* a pathname in the conflicts block */
1788                 } else if (boc) {
1789                         /* the previous was not trailing comment */
1790                         boc = 0;
1791                         in_old_conflicts_block = 0;
1792                 }
1793                 bol = next_line - buf;
1794         }
1795         return boc ? len - boc : len - cutoff;
1796 }