fsck.[ch]: move FOREACH_FSCK_MSG_ID & fsck_msg_id from *.c to *.h
[git] / fsck.c
1 #include "cache.h"
2 #include "object-store.h"
3 #include "repository.h"
4 #include "object.h"
5 #include "blob.h"
6 #include "tree.h"
7 #include "tree-walk.h"
8 #include "commit.h"
9 #include "tag.h"
10 #include "fsck.h"
11 #include "refs.h"
12 #include "url.h"
13 #include "utf8.h"
14 #include "decorate.h"
15 #include "oidset.h"
16 #include "packfile.h"
17 #include "submodule-config.h"
18 #include "config.h"
19 #include "credential.h"
20 #include "help.h"
21
22 static struct oidset gitmodules_found = OIDSET_INIT;
23 static struct oidset gitmodules_done = OIDSET_INIT;
24
25 #define STR(x) #x
26 #define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
27 static struct {
28         const char *id_string;
29         const char *downcased;
30         const char *camelcased;
31         enum fsck_msg_type msg_type;
32 } msg_id_info[FSCK_MSG_MAX + 1] = {
33         FOREACH_FSCK_MSG_ID(MSG_ID)
34         { NULL, NULL, NULL, -1 }
35 };
36 #undef MSG_ID
37 #undef STR
38
39 static void prepare_msg_ids(void)
40 {
41         int i;
42
43         if (msg_id_info[0].downcased)
44                 return;
45
46         /* convert id_string to lower case, without underscores. */
47         for (i = 0; i < FSCK_MSG_MAX; i++) {
48                 const char *p = msg_id_info[i].id_string;
49                 int len = strlen(p);
50                 char *q = xmalloc(len);
51
52                 msg_id_info[i].downcased = q;
53                 while (*p)
54                         if (*p == '_')
55                                 p++;
56                         else
57                                 *(q)++ = tolower(*(p)++);
58                 *q = '\0';
59
60                 p = msg_id_info[i].id_string;
61                 q = xmalloc(len);
62                 msg_id_info[i].camelcased = q;
63                 while (*p) {
64                         if (*p == '_') {
65                                 p++;
66                                 if (*p)
67                                         *q++ = *p++;
68                         } else {
69                                 *q++ = tolower(*p++);
70                         }
71                 }
72                 *q = '\0';
73         }
74 }
75
76 static int parse_msg_id(const char *text)
77 {
78         int i;
79
80         prepare_msg_ids();
81
82         for (i = 0; i < FSCK_MSG_MAX; i++)
83                 if (!strcmp(text, msg_id_info[i].downcased))
84                         return i;
85
86         return -1;
87 }
88
89 void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
90 {
91         int i;
92
93         prepare_msg_ids();
94
95         for (i = 0; i < FSCK_MSG_MAX; i++)
96                 list_config_item(list, prefix, msg_id_info[i].camelcased);
97 }
98
99 static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
100         struct fsck_options *options)
101 {
102         assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
103
104         if (!options->msg_type) {
105                 enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
106
107                 if (options->strict && msg_type == FSCK_WARN)
108                         msg_type = FSCK_ERROR;
109                 return msg_type;
110         }
111
112         return options->msg_type[msg_id];
113 }
114
115 static enum fsck_msg_type parse_msg_type(const char *str)
116 {
117         if (!strcmp(str, "error"))
118                 return FSCK_ERROR;
119         else if (!strcmp(str, "warn"))
120                 return FSCK_WARN;
121         else if (!strcmp(str, "ignore"))
122                 return FSCK_IGNORE;
123         else
124                 die("Unknown fsck message type: '%s'", str);
125 }
126
127 int is_valid_msg_type(const char *msg_id, const char *msg_type)
128 {
129         if (parse_msg_id(msg_id) < 0)
130                 return 0;
131         parse_msg_type(msg_type);
132         return 1;
133 }
134
135 void fsck_set_msg_type(struct fsck_options *options,
136                        const char *msg_id_str, const char *msg_type_str)
137 {
138         int msg_id = parse_msg_id(msg_id_str);
139         enum fsck_msg_type msg_type = parse_msg_type(msg_type_str);
140
141         if (msg_id < 0)
142                 die("Unhandled message id: %s", msg_id_str);
143
144         if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)
145                 die("Cannot demote %s to %s", msg_id_str, msg_type_str);
146
147         if (!options->msg_type) {
148                 int i;
149                 enum fsck_msg_type *severity;
150                 ALLOC_ARRAY(severity, FSCK_MSG_MAX);
151                 for (i = 0; i < FSCK_MSG_MAX; i++)
152                         severity[i] = fsck_msg_type(i, options);
153                 options->msg_type = severity;
154         }
155
156         options->msg_type[msg_id] = msg_type;
157 }
158
159 void fsck_set_msg_types(struct fsck_options *options, const char *values)
160 {
161         char *buf = xstrdup(values), *to_free = buf;
162         int done = 0;
163
164         while (!done) {
165                 int len = strcspn(buf, " ,|"), equal;
166
167                 done = !buf[len];
168                 if (!len) {
169                         buf++;
170                         continue;
171                 }
172                 buf[len] = '\0';
173
174                 for (equal = 0;
175                      equal < len && buf[equal] != '=' && buf[equal] != ':';
176                      equal++)
177                         buf[equal] = tolower(buf[equal]);
178                 buf[equal] = '\0';
179
180                 if (!strcmp(buf, "skiplist")) {
181                         if (equal == len)
182                                 die("skiplist requires a path");
183                         oidset_parse_file(&options->skiplist, buf + equal + 1);
184                         buf += len + 1;
185                         continue;
186                 }
187
188                 if (equal == len)
189                         die("Missing '=': '%s'", buf);
190
191                 fsck_set_msg_type(options, buf, buf + equal + 1);
192                 buf += len + 1;
193         }
194         free(to_free);
195 }
196
197 static int object_on_skiplist(struct fsck_options *opts,
198                               const struct object_id *oid)
199 {
200         return opts && oid && oidset_contains(&opts->skiplist, oid);
201 }
202
203 __attribute__((format (printf, 5, 6)))
204 static int report(struct fsck_options *options,
205                   const struct object_id *oid, enum object_type object_type,
206                   enum fsck_msg_id msg_id, const char *fmt, ...)
207 {
208         va_list ap;
209         struct strbuf sb = STRBUF_INIT;
210         enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
211         int result;
212
213         if (msg_type == FSCK_IGNORE)
214                 return 0;
215
216         if (object_on_skiplist(options, oid))
217                 return 0;
218
219         if (msg_type == FSCK_FATAL)
220                 msg_type = FSCK_ERROR;
221         else if (msg_type == FSCK_INFO)
222                 msg_type = FSCK_WARN;
223
224         prepare_msg_ids();
225         strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
226
227         va_start(ap, fmt);
228         strbuf_vaddf(&sb, fmt, ap);
229         result = options->error_func(options, oid, object_type,
230                                      msg_type, sb.buf);
231         strbuf_release(&sb);
232         va_end(ap);
233
234         return result;
235 }
236
237 void fsck_enable_object_names(struct fsck_options *options)
238 {
239         if (!options->object_names)
240                 options->object_names = kh_init_oid_map();
241 }
242
243 const char *fsck_get_object_name(struct fsck_options *options,
244                                  const struct object_id *oid)
245 {
246         khiter_t pos;
247         if (!options->object_names)
248                 return NULL;
249         pos = kh_get_oid_map(options->object_names, *oid);
250         if (pos >= kh_end(options->object_names))
251                 return NULL;
252         return kh_value(options->object_names, pos);
253 }
254
255 void fsck_put_object_name(struct fsck_options *options,
256                           const struct object_id *oid,
257                           const char *fmt, ...)
258 {
259         va_list ap;
260         struct strbuf buf = STRBUF_INIT;
261         khiter_t pos;
262         int hashret;
263
264         if (!options->object_names)
265                 return;
266
267         pos = kh_put_oid_map(options->object_names, *oid, &hashret);
268         if (!hashret)
269                 return;
270         va_start(ap, fmt);
271         strbuf_vaddf(&buf, fmt, ap);
272         kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);
273         va_end(ap);
274 }
275
276 const char *fsck_describe_object(struct fsck_options *options,
277                                  const struct object_id *oid)
278 {
279         static struct strbuf bufs[] = {
280                 STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
281         };
282         static int b = 0;
283         struct strbuf *buf;
284         const char *name = fsck_get_object_name(options, oid);
285
286         buf = bufs + b;
287         b = (b + 1) % ARRAY_SIZE(bufs);
288         strbuf_reset(buf);
289         strbuf_addstr(buf, oid_to_hex(oid));
290         if (name)
291                 strbuf_addf(buf, " (%s)", name);
292
293         return buf->buf;
294 }
295
296 static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
297 {
298         struct tree_desc desc;
299         struct name_entry entry;
300         int res = 0;
301         const char *name;
302
303         if (parse_tree(tree))
304                 return -1;
305
306         name = fsck_get_object_name(options, &tree->object.oid);
307         if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
308                 return -1;
309         while (tree_entry_gently(&desc, &entry)) {
310                 struct object *obj;
311                 int result;
312
313                 if (S_ISGITLINK(entry.mode))
314                         continue;
315
316                 if (S_ISDIR(entry.mode)) {
317                         obj = (struct object *)lookup_tree(the_repository, &entry.oid);
318                         if (name && obj)
319                                 fsck_put_object_name(options, &entry.oid, "%s%s/",
320                                                      name, entry.path);
321                         result = options->walk(obj, OBJ_TREE, data, options);
322                 }
323                 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
324                         obj = (struct object *)lookup_blob(the_repository, &entry.oid);
325                         if (name && obj)
326                                 fsck_put_object_name(options, &entry.oid, "%s%s",
327                                                      name, entry.path);
328                         result = options->walk(obj, OBJ_BLOB, data, options);
329                 }
330                 else {
331                         result = error("in tree %s: entry %s has bad mode %.6o",
332                                        fsck_describe_object(options, &tree->object.oid),
333                                        entry.path, entry.mode);
334                 }
335                 if (result < 0)
336                         return result;
337                 if (!res)
338                         res = result;
339         }
340         return res;
341 }
342
343 static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
344 {
345         int counter = 0, generation = 0, name_prefix_len = 0;
346         struct commit_list *parents;
347         int res;
348         int result;
349         const char *name;
350
351         if (parse_commit(commit))
352                 return -1;
353
354         name = fsck_get_object_name(options, &commit->object.oid);
355         if (name)
356                 fsck_put_object_name(options, get_commit_tree_oid(commit),
357                                      "%s:", name);
358
359         result = options->walk((struct object *)get_commit_tree(commit),
360                                OBJ_TREE, data, options);
361         if (result < 0)
362                 return result;
363         res = result;
364
365         parents = commit->parents;
366         if (name && parents) {
367                 int len = strlen(name), power;
368
369                 if (len && name[len - 1] == '^') {
370                         generation = 1;
371                         name_prefix_len = len - 1;
372                 }
373                 else { /* parse ~<generation> suffix */
374                         for (generation = 0, power = 1;
375                              len && isdigit(name[len - 1]);
376                              power *= 10)
377                                 generation += power * (name[--len] - '0');
378                         if (power > 1 && len && name[len - 1] == '~')
379                                 name_prefix_len = len - 1;
380                         else {
381                                 /* Maybe a non-first parent, e.g. HEAD^2 */
382                                 generation = 0;
383                                 name_prefix_len = len;
384                         }
385                 }
386         }
387
388         while (parents) {
389                 if (name) {
390                         struct object_id *oid = &parents->item->object.oid;
391
392                         if (counter++)
393                                 fsck_put_object_name(options, oid, "%s^%d",
394                                                      name, counter);
395                         else if (generation > 0)
396                                 fsck_put_object_name(options, oid, "%.*s~%d",
397                                                      name_prefix_len, name,
398                                                      generation + 1);
399                         else
400                                 fsck_put_object_name(options, oid, "%s^", name);
401                 }
402                 result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
403                 if (result < 0)
404                         return result;
405                 if (!res)
406                         res = result;
407                 parents = parents->next;
408         }
409         return res;
410 }
411
412 static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
413 {
414         const char *name = fsck_get_object_name(options, &tag->object.oid);
415
416         if (parse_tag(tag))
417                 return -1;
418         if (name)
419                 fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
420         return options->walk(tag->tagged, OBJ_ANY, data, options);
421 }
422
423 int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
424 {
425         if (!obj)
426                 return -1;
427
428         if (obj->type == OBJ_NONE)
429                 parse_object(the_repository, &obj->oid);
430
431         switch (obj->type) {
432         case OBJ_BLOB:
433                 return 0;
434         case OBJ_TREE:
435                 return fsck_walk_tree((struct tree *)obj, data, options);
436         case OBJ_COMMIT:
437                 return fsck_walk_commit((struct commit *)obj, data, options);
438         case OBJ_TAG:
439                 return fsck_walk_tag((struct tag *)obj, data, options);
440         default:
441                 error("Unknown object type for %s",
442                       fsck_describe_object(options, &obj->oid));
443                 return -1;
444         }
445 }
446
447 struct name_stack {
448         const char **names;
449         size_t nr, alloc;
450 };
451
452 static void name_stack_push(struct name_stack *stack, const char *name)
453 {
454         ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);
455         stack->names[stack->nr++] = name;
456 }
457
458 static const char *name_stack_pop(struct name_stack *stack)
459 {
460         return stack->nr ? stack->names[--stack->nr] : NULL;
461 }
462
463 static void name_stack_clear(struct name_stack *stack)
464 {
465         FREE_AND_NULL(stack->names);
466         stack->nr = stack->alloc = 0;
467 }
468
469 /*
470  * The entries in a tree are ordered in the _path_ order,
471  * which means that a directory entry is ordered by adding
472  * a slash to the end of it.
473  *
474  * So a directory called "a" is ordered _after_ a file
475  * called "a.c", because "a/" sorts after "a.c".
476  */
477 #define TREE_UNORDERED (-1)
478 #define TREE_HAS_DUPS  (-2)
479
480 static int is_less_than_slash(unsigned char c)
481 {
482         return '\0' < c && c < '/';
483 }
484
485 static int verify_ordered(unsigned mode1, const char *name1,
486                           unsigned mode2, const char *name2,
487                           struct name_stack *candidates)
488 {
489         int len1 = strlen(name1);
490         int len2 = strlen(name2);
491         int len = len1 < len2 ? len1 : len2;
492         unsigned char c1, c2;
493         int cmp;
494
495         cmp = memcmp(name1, name2, len);
496         if (cmp < 0)
497                 return 0;
498         if (cmp > 0)
499                 return TREE_UNORDERED;
500
501         /*
502          * Ok, the first <len> characters are the same.
503          * Now we need to order the next one, but turn
504          * a '\0' into a '/' for a directory entry.
505          */
506         c1 = name1[len];
507         c2 = name2[len];
508         if (!c1 && !c2)
509                 /*
510                  * git-write-tree used to write out a nonsense tree that has
511                  * entries with the same name, one blob and one tree.  Make
512                  * sure we do not have duplicate entries.
513                  */
514                 return TREE_HAS_DUPS;
515         if (!c1 && S_ISDIR(mode1))
516                 c1 = '/';
517         if (!c2 && S_ISDIR(mode2))
518                 c2 = '/';
519
520         /*
521          * There can be non-consecutive duplicates due to the implicitly
522          * added slash, e.g.:
523          *
524          *   foo
525          *   foo.bar
526          *   foo.bar.baz
527          *   foo.bar/
528          *   foo/
529          *
530          * Record non-directory candidates (like "foo" and "foo.bar" in
531          * the example) on a stack and check directory candidates (like
532          * foo/" and "foo.bar/") against that stack.
533          */
534         if (!c1 && is_less_than_slash(c2)) {
535                 name_stack_push(candidates, name1);
536         } else if (c2 == '/' && is_less_than_slash(c1)) {
537                 for (;;) {
538                         const char *p;
539                         const char *f_name = name_stack_pop(candidates);
540
541                         if (!f_name)
542                                 break;
543                         if (!skip_prefix(name2, f_name, &p))
544                                 continue;
545                         if (!*p)
546                                 return TREE_HAS_DUPS;
547                         if (is_less_than_slash(*p)) {
548                                 name_stack_push(candidates, f_name);
549                                 break;
550                         }
551                 }
552         }
553
554         return c1 < c2 ? 0 : TREE_UNORDERED;
555 }
556
557 static int fsck_tree(const struct object_id *oid,
558                      const char *buffer, unsigned long size,
559                      struct fsck_options *options)
560 {
561         int retval = 0;
562         int has_null_sha1 = 0;
563         int has_full_path = 0;
564         int has_empty_name = 0;
565         int has_dot = 0;
566         int has_dotdot = 0;
567         int has_dotgit = 0;
568         int has_zero_pad = 0;
569         int has_bad_modes = 0;
570         int has_dup_entries = 0;
571         int not_properly_sorted = 0;
572         struct tree_desc desc;
573         unsigned o_mode;
574         const char *o_name;
575         struct name_stack df_dup_candidates = { NULL };
576
577         if (init_tree_desc_gently(&desc, buffer, size)) {
578                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
579                 return retval;
580         }
581
582         o_mode = 0;
583         o_name = NULL;
584
585         while (desc.size) {
586                 unsigned short mode;
587                 const char *name, *backslash;
588                 const struct object_id *oid;
589
590                 oid = tree_entry_extract(&desc, &name, &mode);
591
592                 has_null_sha1 |= is_null_oid(oid);
593                 has_full_path |= !!strchr(name, '/');
594                 has_empty_name |= !*name;
595                 has_dot |= !strcmp(name, ".");
596                 has_dotdot |= !strcmp(name, "..");
597                 has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
598                 has_zero_pad |= *(char *)desc.buffer == '0';
599
600                 if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
601                         if (!S_ISLNK(mode))
602                                 oidset_insert(&gitmodules_found, oid);
603                         else
604                                 retval += report(options,
605                                                  oid, OBJ_TREE,
606                                                  FSCK_MSG_GITMODULES_SYMLINK,
607                                                  ".gitmodules is a symbolic link");
608                 }
609
610                 if ((backslash = strchr(name, '\\'))) {
611                         while (backslash) {
612                                 backslash++;
613                                 has_dotgit |= is_ntfs_dotgit(backslash);
614                                 if (is_ntfs_dotgitmodules(backslash)) {
615                                         if (!S_ISLNK(mode))
616                                                 oidset_insert(&gitmodules_found, oid);
617                                         else
618                                                 retval += report(options, oid, OBJ_TREE,
619                                                                  FSCK_MSG_GITMODULES_SYMLINK,
620                                                                  ".gitmodules is a symbolic link");
621                                 }
622                                 backslash = strchr(backslash, '\\');
623                         }
624                 }
625
626                 if (update_tree_entry_gently(&desc)) {
627                         retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
628                         break;
629                 }
630
631                 switch (mode) {
632                 /*
633                  * Standard modes..
634                  */
635                 case S_IFREG | 0755:
636                 case S_IFREG | 0644:
637                 case S_IFLNK:
638                 case S_IFDIR:
639                 case S_IFGITLINK:
640                         break;
641                 /*
642                  * This is nonstandard, but we had a few of these
643                  * early on when we honored the full set of mode
644                  * bits..
645                  */
646                 case S_IFREG | 0664:
647                         if (!options->strict)
648                                 break;
649                         /* fallthrough */
650                 default:
651                         has_bad_modes = 1;
652                 }
653
654                 if (o_name) {
655                         switch (verify_ordered(o_mode, o_name, mode, name,
656                                                &df_dup_candidates)) {
657                         case TREE_UNORDERED:
658                                 not_properly_sorted = 1;
659                                 break;
660                         case TREE_HAS_DUPS:
661                                 has_dup_entries = 1;
662                                 break;
663                         default:
664                                 break;
665                         }
666                 }
667
668                 o_mode = mode;
669                 o_name = name;
670         }
671
672         name_stack_clear(&df_dup_candidates);
673
674         if (has_null_sha1)
675                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
676         if (has_full_path)
677                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_FULL_PATHNAME, "contains full pathnames");
678         if (has_empty_name)
679                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_EMPTY_NAME, "contains empty pathname");
680         if (has_dot)
681                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOT, "contains '.'");
682         if (has_dotdot)
683                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOTDOT, "contains '..'");
684         if (has_dotgit)
685                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOTGIT, "contains '.git'");
686         if (has_zero_pad)
687                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_ZERO_PADDED_FILEMODE, "contains zero-padded file modes");
688         if (has_bad_modes)
689                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_FILEMODE, "contains bad file modes");
690         if (has_dup_entries)
691                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_DUPLICATE_ENTRIES, "contains duplicate file entries");
692         if (not_properly_sorted)
693                 retval += report(options, oid, OBJ_TREE, FSCK_MSG_TREE_NOT_SORTED, "not properly sorted");
694         return retval;
695 }
696
697 static int verify_headers(const void *data, unsigned long size,
698                           const struct object_id *oid, enum object_type type,
699                           struct fsck_options *options)
700 {
701         const char *buffer = (const char *)data;
702         unsigned long i;
703
704         for (i = 0; i < size; i++) {
705                 switch (buffer[i]) {
706                 case '\0':
707                         return report(options, oid, type,
708                                 FSCK_MSG_NUL_IN_HEADER,
709                                 "unterminated header: NUL at offset %ld", i);
710                 case '\n':
711                         if (i + 1 < size && buffer[i + 1] == '\n')
712                                 return 0;
713                 }
714         }
715
716         /*
717          * We did not find double-LF that separates the header
718          * and the body.  Not having a body is not a crime but
719          * we do want to see the terminating LF for the last header
720          * line.
721          */
722         if (size && buffer[size - 1] == '\n')
723                 return 0;
724
725         return report(options, oid, type,
726                 FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
727 }
728
729 static int fsck_ident(const char **ident,
730                       const struct object_id *oid, enum object_type type,
731                       struct fsck_options *options)
732 {
733         const char *p = *ident;
734         char *end;
735
736         *ident = strchrnul(*ident, '\n');
737         if (**ident == '\n')
738                 (*ident)++;
739
740         if (*p == '<')
741                 return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
742         p += strcspn(p, "<>\n");
743         if (*p == '>')
744                 return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
745         if (*p != '<')
746                 return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
747         if (p[-1] != ' ')
748                 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
749         p++;
750         p += strcspn(p, "<>\n");
751         if (*p != '>')
752                 return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
753         p++;
754         if (*p != ' ')
755                 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
756         p++;
757         if (*p == '0' && p[1] != ' ')
758                 return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
759         if (date_overflows(parse_timestamp(p, &end, 10)))
760                 return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
761         if ((end == p || *end != ' '))
762                 return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
763         p = end + 1;
764         if ((*p != '+' && *p != '-') ||
765             !isdigit(p[1]) ||
766             !isdigit(p[2]) ||
767             !isdigit(p[3]) ||
768             !isdigit(p[4]) ||
769             (p[5] != '\n'))
770                 return report(options, oid, type, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
771         p += 6;
772         return 0;
773 }
774
775 static int fsck_commit(const struct object_id *oid,
776                        const char *buffer, unsigned long size,
777                        struct fsck_options *options)
778 {
779         struct object_id tree_oid, parent_oid;
780         unsigned author_count;
781         int err;
782         const char *buffer_begin = buffer;
783         const char *p;
784
785         if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
786                 return -1;
787
788         if (!skip_prefix(buffer, "tree ", &buffer))
789                 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
790         if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
791                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
792                 if (err)
793                         return err;
794         }
795         buffer = p + 1;
796         while (skip_prefix(buffer, "parent ", &buffer)) {
797                 if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
798                         err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
799                         if (err)
800                                 return err;
801                 }
802                 buffer = p + 1;
803         }
804         author_count = 0;
805         while (skip_prefix(buffer, "author ", &buffer)) {
806                 author_count++;
807                 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
808                 if (err)
809                         return err;
810         }
811         if (author_count < 1)
812                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
813         else if (author_count > 1)
814                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
815         if (err)
816                 return err;
817         if (!skip_prefix(buffer, "committer ", &buffer))
818                 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
819         err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
820         if (err)
821                 return err;
822         if (memchr(buffer_begin, '\0', size)) {
823                 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
824                              "NUL byte in the commit object body");
825                 if (err)
826                         return err;
827         }
828         return 0;
829 }
830
831 static int fsck_tag(const struct object_id *oid, const char *buffer,
832                     unsigned long size, struct fsck_options *options)
833 {
834         struct object_id tagged_oid;
835         int tagged_type;
836         return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,
837                                    &tagged_type);
838 }
839
840 int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
841                         unsigned long size, struct fsck_options *options,
842                         struct object_id *tagged_oid,
843                         int *tagged_type)
844 {
845         int ret = 0;
846         char *eol;
847         struct strbuf sb = STRBUF_INIT;
848         const char *p;
849
850         ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
851         if (ret)
852                 goto done;
853
854         if (!skip_prefix(buffer, "object ", &buffer)) {
855                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
856                 goto done;
857         }
858         if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {
859                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
860                 if (ret)
861                         goto done;
862         }
863         buffer = p + 1;
864
865         if (!skip_prefix(buffer, "type ", &buffer)) {
866                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
867                 goto done;
868         }
869         eol = strchr(buffer, '\n');
870         if (!eol) {
871                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
872                 goto done;
873         }
874         *tagged_type = type_from_string_gently(buffer, eol - buffer, 1);
875         if (*tagged_type < 0)
876                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
877         if (ret)
878                 goto done;
879         buffer = eol + 1;
880
881         if (!skip_prefix(buffer, "tag ", &buffer)) {
882                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
883                 goto done;
884         }
885         eol = strchr(buffer, '\n');
886         if (!eol) {
887                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
888                 goto done;
889         }
890         strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
891         if (check_refname_format(sb.buf, 0)) {
892                 ret = report(options, oid, OBJ_TAG,
893                              FSCK_MSG_BAD_TAG_NAME,
894                              "invalid 'tag' name: %.*s",
895                              (int)(eol - buffer), buffer);
896                 if (ret)
897                         goto done;
898         }
899         buffer = eol + 1;
900
901         if (!skip_prefix(buffer, "tagger ", &buffer)) {
902                 /* early tags do not contain 'tagger' lines; warn only */
903                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
904                 if (ret)
905                         goto done;
906         }
907         else
908                 ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
909         if (!*buffer)
910                 goto done;
911
912         if (!starts_with(buffer, "\n")) {
913                 /*
914                  * The verify_headers() check will allow
915                  * e.g. "[...]tagger <tagger>\nsome
916                  * garbage\n\nmessage" to pass, thinking "some
917                  * garbage" could be a custom header. E.g. "mktag"
918                  * doesn't want any unknown headers.
919                  */
920                 ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");
921                 if (ret)
922                         goto done;
923         }
924
925 done:
926         strbuf_release(&sb);
927         return ret;
928 }
929
930 /*
931  * Like builtin/submodule--helper.c's starts_with_dot_slash, but without
932  * relying on the platform-dependent is_dir_sep helper.
933  *
934  * This is for use in checking whether a submodule URL is interpreted as
935  * relative to the current directory on any platform, since \ is a
936  * directory separator on Windows but not on other platforms.
937  */
938 static int starts_with_dot_slash(const char *str)
939 {
940         return str[0] == '.' && (str[1] == '/' || str[1] == '\\');
941 }
942
943 /*
944  * Like starts_with_dot_slash, this is a variant of submodule--helper's
945  * helper of the same name with the twist that it accepts backslash as a
946  * directory separator even on non-Windows platforms.
947  */
948 static int starts_with_dot_dot_slash(const char *str)
949 {
950         return str[0] == '.' && starts_with_dot_slash(str + 1);
951 }
952
953 static int submodule_url_is_relative(const char *url)
954 {
955         return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
956 }
957
958 /*
959  * Count directory components that a relative submodule URL should chop
960  * from the remote_url it is to be resolved against.
961  *
962  * In other words, this counts "../" components at the start of a
963  * submodule URL.
964  *
965  * Returns the number of directory components to chop and writes a
966  * pointer to the next character of url after all leading "./" and
967  * "../" components to out.
968  */
969 static int count_leading_dotdots(const char *url, const char **out)
970 {
971         int result = 0;
972         while (1) {
973                 if (starts_with_dot_dot_slash(url)) {
974                         result++;
975                         url += strlen("../");
976                         continue;
977                 }
978                 if (starts_with_dot_slash(url)) {
979                         url += strlen("./");
980                         continue;
981                 }
982                 *out = url;
983                 return result;
984         }
985 }
986 /*
987  * Check whether a transport is implemented by git-remote-curl.
988  *
989  * If it is, returns 1 and writes the URL that would be passed to
990  * git-remote-curl to the "out" parameter.
991  *
992  * Otherwise, returns 0 and leaves "out" untouched.
993  *
994  * Examples:
995  *   http::https://example.com/repo.git -> 1, https://example.com/repo.git
996  *   https://example.com/repo.git -> 1, https://example.com/repo.git
997  *   git://example.com/repo.git -> 0
998  *
999  * This is for use in checking for previously exploitable bugs that
1000  * required a submodule URL to be passed to git-remote-curl.
1001  */
1002 static int url_to_curl_url(const char *url, const char **out)
1003 {
1004         /*
1005          * We don't need to check for case-aliases, "http.exe", and so
1006          * on because in the default configuration, is_transport_allowed
1007          * prevents URLs with those schemes from being cloned
1008          * automatically.
1009          */
1010         if (skip_prefix(url, "http::", out) ||
1011             skip_prefix(url, "https::", out) ||
1012             skip_prefix(url, "ftp::", out) ||
1013             skip_prefix(url, "ftps::", out))
1014                 return 1;
1015         if (starts_with(url, "http://") ||
1016             starts_with(url, "https://") ||
1017             starts_with(url, "ftp://") ||
1018             starts_with(url, "ftps://")) {
1019                 *out = url;
1020                 return 1;
1021         }
1022         return 0;
1023 }
1024
1025 static int check_submodule_url(const char *url)
1026 {
1027         const char *curl_url;
1028
1029         if (looks_like_command_line_option(url))
1030                 return -1;
1031
1032         if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
1033                 char *decoded;
1034                 const char *next;
1035                 int has_nl;
1036
1037                 /*
1038                  * This could be appended to an http URL and url-decoded;
1039                  * check for malicious characters.
1040                  */
1041                 decoded = url_decode(url);
1042                 has_nl = !!strchr(decoded, '\n');
1043
1044                 free(decoded);
1045                 if (has_nl)
1046                         return -1;
1047
1048                 /*
1049                  * URLs which escape their root via "../" can overwrite
1050                  * the host field and previous components, resolving to
1051                  * URLs like https::example.com/submodule.git and
1052                  * https:///example.com/submodule.git that were
1053                  * susceptible to CVE-2020-11008.
1054                  */
1055                 if (count_leading_dotdots(url, &next) > 0 &&
1056                     (*next == ':' || *next == '/'))
1057                         return -1;
1058         }
1059
1060         else if (url_to_curl_url(url, &curl_url)) {
1061                 struct credential c = CREDENTIAL_INIT;
1062                 int ret = 0;
1063                 if (credential_from_url_gently(&c, curl_url, 1) ||
1064                     !*c.host)
1065                         ret = -1;
1066                 credential_clear(&c);
1067                 return ret;
1068         }
1069
1070         return 0;
1071 }
1072
1073 struct fsck_gitmodules_data {
1074         const struct object_id *oid;
1075         struct fsck_options *options;
1076         int ret;
1077 };
1078
1079 static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
1080 {
1081         struct fsck_gitmodules_data *data = vdata;
1082         const char *subsection, *key;
1083         size_t subsection_len;
1084         char *name;
1085
1086         if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
1087             !subsection)
1088                 return 0;
1089
1090         name = xmemdupz(subsection, subsection_len);
1091         if (check_submodule_name(name) < 0)
1092                 data->ret |= report(data->options,
1093                                     data->oid, OBJ_BLOB,
1094                                     FSCK_MSG_GITMODULES_NAME,
1095                                     "disallowed submodule name: %s",
1096                                     name);
1097         if (!strcmp(key, "url") && value &&
1098             check_submodule_url(value) < 0)
1099                 data->ret |= report(data->options,
1100                                     data->oid, OBJ_BLOB,
1101                                     FSCK_MSG_GITMODULES_URL,
1102                                     "disallowed submodule url: %s",
1103                                     value);
1104         if (!strcmp(key, "path") && value &&
1105             looks_like_command_line_option(value))
1106                 data->ret |= report(data->options,
1107                                     data->oid, OBJ_BLOB,
1108                                     FSCK_MSG_GITMODULES_PATH,
1109                                     "disallowed submodule path: %s",
1110                                     value);
1111         if (!strcmp(key, "update") && value &&
1112             parse_submodule_update_type(value) == SM_UPDATE_COMMAND)
1113                 data->ret |= report(data->options, data->oid, OBJ_BLOB,
1114                                     FSCK_MSG_GITMODULES_UPDATE,
1115                                     "disallowed submodule update setting: %s",
1116                                     value);
1117         free(name);
1118
1119         return 0;
1120 }
1121
1122 static int fsck_blob(const struct object_id *oid, const char *buf,
1123                      unsigned long size, struct fsck_options *options)
1124 {
1125         struct fsck_gitmodules_data data;
1126         struct config_options config_opts = { 0 };
1127
1128         if (!oidset_contains(&gitmodules_found, oid))
1129                 return 0;
1130         oidset_insert(&gitmodules_done, oid);
1131
1132         if (object_on_skiplist(options, oid))
1133                 return 0;
1134
1135         if (!buf) {
1136                 /*
1137                  * A missing buffer here is a sign that the caller found the
1138                  * blob too gigantic to load into memory. Let's just consider
1139                  * that an error.
1140                  */
1141                 return report(options, oid, OBJ_BLOB,
1142                               FSCK_MSG_GITMODULES_LARGE,
1143                               ".gitmodules too large to parse");
1144         }
1145
1146         data.oid = oid;
1147         data.options = options;
1148         data.ret = 0;
1149         config_opts.error_action = CONFIG_ERROR_SILENT;
1150         if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1151                                 ".gitmodules", buf, size, &data, &config_opts))
1152                 data.ret |= report(options, oid, OBJ_BLOB,
1153                                    FSCK_MSG_GITMODULES_PARSE,
1154                                    "could not parse gitmodules blob");
1155
1156         return data.ret;
1157 }
1158
1159 int fsck_object(struct object *obj, void *data, unsigned long size,
1160         struct fsck_options *options)
1161 {
1162         if (!obj)
1163                 return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
1164
1165         if (obj->type == OBJ_BLOB)
1166                 return fsck_blob(&obj->oid, data, size, options);
1167         if (obj->type == OBJ_TREE)
1168                 return fsck_tree(&obj->oid, data, size, options);
1169         if (obj->type == OBJ_COMMIT)
1170                 return fsck_commit(&obj->oid, data, size, options);
1171         if (obj->type == OBJ_TAG)
1172                 return fsck_tag(&obj->oid, data, size, options);
1173
1174         return report(options, &obj->oid, obj->type,
1175                       FSCK_MSG_UNKNOWN_TYPE,
1176                       "unknown type '%d' (internal fsck error)",
1177                       obj->type);
1178 }
1179
1180 int fsck_error_function(struct fsck_options *o,
1181                         const struct object_id *oid,
1182                         enum object_type object_type,
1183                         enum fsck_msg_type msg_type, const char *message)
1184 {
1185         if (msg_type == FSCK_WARN) {
1186                 warning("object %s: %s", fsck_describe_object(o, oid), message);
1187                 return 0;
1188         }
1189         error("object %s: %s", fsck_describe_object(o, oid), message);
1190         return 1;
1191 }
1192
1193 void register_found_gitmodules(const struct object_id *oid)
1194 {
1195         oidset_insert(&gitmodules_found, oid);
1196 }
1197
1198 int fsck_finish(struct fsck_options *options)
1199 {
1200         int ret = 0;
1201         struct oidset_iter iter;
1202         const struct object_id *oid;
1203
1204         oidset_iter_init(&gitmodules_found, &iter);
1205         while ((oid = oidset_iter_next(&iter))) {
1206                 enum object_type type;
1207                 unsigned long size;
1208                 char *buf;
1209
1210                 if (oidset_contains(&gitmodules_done, oid))
1211                         continue;
1212
1213                 buf = read_object_file(oid, &type, &size);
1214                 if (!buf) {
1215                         if (is_promisor_object(oid))
1216                                 continue;
1217                         ret |= report(options,
1218                                       oid, OBJ_BLOB,
1219                                       FSCK_MSG_GITMODULES_MISSING,
1220                                       "unable to read .gitmodules blob");
1221                         continue;
1222                 }
1223
1224                 if (type == OBJ_BLOB)
1225                         ret |= fsck_blob(oid, buf, size, options);
1226                 else
1227                         ret |= report(options,
1228                                       oid, type,
1229                                       FSCK_MSG_GITMODULES_BLOB,
1230                                       "non-blob found at .gitmodules");
1231                 free(buf);
1232         }
1233
1234
1235         oidset_clear(&gitmodules_found);
1236         oidset_clear(&gitmodules_done);
1237         return ret;
1238 }
1239
1240 int git_fsck_config(const char *var, const char *value, void *cb)
1241 {
1242         struct fsck_options *options = cb;
1243         if (strcmp(var, "fsck.skiplist") == 0) {
1244                 const char *path;
1245                 struct strbuf sb = STRBUF_INIT;
1246
1247                 if (git_config_pathname(&path, var, value))
1248                         return 1;
1249                 strbuf_addf(&sb, "skiplist=%s", path);
1250                 free((char *)path);
1251                 fsck_set_msg_types(options, sb.buf);
1252                 strbuf_release(&sb);
1253                 return 0;
1254         }
1255
1256         if (skip_prefix(var, "fsck.", &var)) {
1257                 fsck_set_msg_type(options, var, value);
1258                 return 0;
1259         }
1260
1261         return git_default_config(var, value, cb);
1262 }