fsck: introduce identifiers for fsck messages
[git] / fsck.c
1 #include "cache.h"
2 #include "object.h"
3 #include "blob.h"
4 #include "tree.h"
5 #include "tree-walk.h"
6 #include "commit.h"
7 #include "tag.h"
8 #include "fsck.h"
9 #include "refs.h"
10 #include "utf8.h"
11
12 #define FOREACH_MSG_ID(FUNC) \
13         /* errors */ \
14         FUNC(BAD_DATE, ERROR) \
15         FUNC(BAD_DATE_OVERFLOW, ERROR) \
16         FUNC(BAD_EMAIL, ERROR) \
17         FUNC(BAD_NAME, ERROR) \
18         FUNC(BAD_OBJECT_SHA1, ERROR) \
19         FUNC(BAD_PARENT_SHA1, ERROR) \
20         FUNC(BAD_TAG_OBJECT, ERROR) \
21         FUNC(BAD_TIMEZONE, ERROR) \
22         FUNC(BAD_TREE, ERROR) \
23         FUNC(BAD_TREE_SHA1, ERROR) \
24         FUNC(BAD_TYPE, ERROR) \
25         FUNC(DUPLICATE_ENTRIES, ERROR) \
26         FUNC(MISSING_AUTHOR, ERROR) \
27         FUNC(MISSING_COMMITTER, ERROR) \
28         FUNC(MISSING_EMAIL, ERROR) \
29         FUNC(MISSING_GRAFT, ERROR) \
30         FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
31         FUNC(MISSING_OBJECT, ERROR) \
32         FUNC(MISSING_PARENT, ERROR) \
33         FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
34         FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
35         FUNC(MISSING_TAG, ERROR) \
36         FUNC(MISSING_TAG_ENTRY, ERROR) \
37         FUNC(MISSING_TAG_OBJECT, ERROR) \
38         FUNC(MISSING_TREE, ERROR) \
39         FUNC(MISSING_TYPE, ERROR) \
40         FUNC(MISSING_TYPE_ENTRY, ERROR) \
41         FUNC(NUL_IN_HEADER, ERROR) \
42         FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
43         FUNC(TREE_NOT_SORTED, ERROR) \
44         FUNC(UNKNOWN_TYPE, ERROR) \
45         FUNC(UNTERMINATED_HEADER, ERROR) \
46         FUNC(ZERO_PADDED_DATE, ERROR) \
47         /* warnings */ \
48         FUNC(BAD_FILEMODE, WARN) \
49         FUNC(BAD_TAG_NAME, WARN) \
50         FUNC(EMPTY_NAME, WARN) \
51         FUNC(FULL_PATHNAME, WARN) \
52         FUNC(HAS_DOT, WARN) \
53         FUNC(HAS_DOTDOT, WARN) \
54         FUNC(HAS_DOTGIT, WARN) \
55         FUNC(MISSING_TAGGER_ENTRY, WARN) \
56         FUNC(NULL_SHA1, WARN) \
57         FUNC(ZERO_PADDED_FILEMODE, WARN)
58
59 #define MSG_ID(id, msg_type) FSCK_MSG_##id,
60 enum fsck_msg_id {
61         FOREACH_MSG_ID(MSG_ID)
62         FSCK_MSG_MAX
63 };
64 #undef MSG_ID
65
66 #define MSG_ID(id, msg_type) { FSCK_##msg_type },
67 static struct {
68         int msg_type;
69 } msg_id_info[FSCK_MSG_MAX + 1] = {
70         FOREACH_MSG_ID(MSG_ID)
71         { -1 }
72 };
73 #undef MSG_ID
74
75 static int fsck_msg_type(enum fsck_msg_id msg_id,
76         struct fsck_options *options)
77 {
78         int msg_type;
79
80         msg_type = msg_id_info[msg_id].msg_type;
81         if (options->strict && msg_type == FSCK_WARN)
82                 msg_type = FSCK_ERROR;
83
84         return msg_type;
85 }
86
87 __attribute__((format (printf, 4, 5)))
88 static int report(struct fsck_options *options, struct object *object,
89         enum fsck_msg_id id, const char *fmt, ...)
90 {
91         va_list ap;
92         struct strbuf sb = STRBUF_INIT;
93         int msg_type = fsck_msg_type(id, options), result;
94
95         va_start(ap, fmt);
96         strbuf_vaddf(&sb, fmt, ap);
97         result = options->error_func(object, msg_type, sb.buf);
98         strbuf_release(&sb);
99         va_end(ap);
100
101         return result;
102 }
103
104 static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
105 {
106         struct tree_desc desc;
107         struct name_entry entry;
108         int res = 0;
109
110         if (parse_tree(tree))
111                 return -1;
112
113         init_tree_desc(&desc, tree->buffer, tree->size);
114         while (tree_entry(&desc, &entry)) {
115                 int result;
116
117                 if (S_ISGITLINK(entry.mode))
118                         continue;
119                 if (S_ISDIR(entry.mode))
120                         result = options->walk(&lookup_tree(entry.sha1)->object, OBJ_TREE, data, options);
121                 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode))
122                         result = options->walk(&lookup_blob(entry.sha1)->object, OBJ_BLOB, data, options);
123                 else {
124                         result = error("in tree %s: entry %s has bad mode %.6o",
125                                         sha1_to_hex(tree->object.sha1), entry.path, entry.mode);
126                 }
127                 if (result < 0)
128                         return result;
129                 if (!res)
130                         res = result;
131         }
132         return res;
133 }
134
135 static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
136 {
137         struct commit_list *parents;
138         int res;
139         int result;
140
141         if (parse_commit(commit))
142                 return -1;
143
144         result = options->walk((struct object *)commit->tree, OBJ_TREE, data, options);
145         if (result < 0)
146                 return result;
147         res = result;
148
149         parents = commit->parents;
150         while (parents) {
151                 result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
152                 if (result < 0)
153                         return result;
154                 if (!res)
155                         res = result;
156                 parents = parents->next;
157         }
158         return res;
159 }
160
161 static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
162 {
163         if (parse_tag(tag))
164                 return -1;
165         return options->walk(tag->tagged, OBJ_ANY, data, options);
166 }
167
168 int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
169 {
170         if (!obj)
171                 return -1;
172         switch (obj->type) {
173         case OBJ_BLOB:
174                 return 0;
175         case OBJ_TREE:
176                 return fsck_walk_tree((struct tree *)obj, data, options);
177         case OBJ_COMMIT:
178                 return fsck_walk_commit((struct commit *)obj, data, options);
179         case OBJ_TAG:
180                 return fsck_walk_tag((struct tag *)obj, data, options);
181         default:
182                 error("Unknown object type for %s", sha1_to_hex(obj->sha1));
183                 return -1;
184         }
185 }
186
187 /*
188  * The entries in a tree are ordered in the _path_ order,
189  * which means that a directory entry is ordered by adding
190  * a slash to the end of it.
191  *
192  * So a directory called "a" is ordered _after_ a file
193  * called "a.c", because "a/" sorts after "a.c".
194  */
195 #define TREE_UNORDERED (-1)
196 #define TREE_HAS_DUPS  (-2)
197
198 static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
199 {
200         int len1 = strlen(name1);
201         int len2 = strlen(name2);
202         int len = len1 < len2 ? len1 : len2;
203         unsigned char c1, c2;
204         int cmp;
205
206         cmp = memcmp(name1, name2, len);
207         if (cmp < 0)
208                 return 0;
209         if (cmp > 0)
210                 return TREE_UNORDERED;
211
212         /*
213          * Ok, the first <len> characters are the same.
214          * Now we need to order the next one, but turn
215          * a '\0' into a '/' for a directory entry.
216          */
217         c1 = name1[len];
218         c2 = name2[len];
219         if (!c1 && !c2)
220                 /*
221                  * git-write-tree used to write out a nonsense tree that has
222                  * entries with the same name, one blob and one tree.  Make
223                  * sure we do not have duplicate entries.
224                  */
225                 return TREE_HAS_DUPS;
226         if (!c1 && S_ISDIR(mode1))
227                 c1 = '/';
228         if (!c2 && S_ISDIR(mode2))
229                 c2 = '/';
230         return c1 < c2 ? 0 : TREE_UNORDERED;
231 }
232
233 static int fsck_tree(struct tree *item, struct fsck_options *options)
234 {
235         int retval;
236         int has_null_sha1 = 0;
237         int has_full_path = 0;
238         int has_empty_name = 0;
239         int has_dot = 0;
240         int has_dotdot = 0;
241         int has_dotgit = 0;
242         int has_zero_pad = 0;
243         int has_bad_modes = 0;
244         int has_dup_entries = 0;
245         int not_properly_sorted = 0;
246         struct tree_desc desc;
247         unsigned o_mode;
248         const char *o_name;
249
250         init_tree_desc(&desc, item->buffer, item->size);
251
252         o_mode = 0;
253         o_name = NULL;
254
255         while (desc.size) {
256                 unsigned mode;
257                 const char *name;
258                 const unsigned char *sha1;
259
260                 sha1 = tree_entry_extract(&desc, &name, &mode);
261
262                 has_null_sha1 |= is_null_sha1(sha1);
263                 has_full_path |= !!strchr(name, '/');
264                 has_empty_name |= !*name;
265                 has_dot |= !strcmp(name, ".");
266                 has_dotdot |= !strcmp(name, "..");
267                 has_dotgit |= (!strcmp(name, ".git") ||
268                                is_hfs_dotgit(name) ||
269                                is_ntfs_dotgit(name));
270                 has_zero_pad |= *(char *)desc.buffer == '0';
271                 update_tree_entry(&desc);
272
273                 switch (mode) {
274                 /*
275                  * Standard modes..
276                  */
277                 case S_IFREG | 0755:
278                 case S_IFREG | 0644:
279                 case S_IFLNK:
280                 case S_IFDIR:
281                 case S_IFGITLINK:
282                         break;
283                 /*
284                  * This is nonstandard, but we had a few of these
285                  * early on when we honored the full set of mode
286                  * bits..
287                  */
288                 case S_IFREG | 0664:
289                         if (!options->strict)
290                                 break;
291                 default:
292                         has_bad_modes = 1;
293                 }
294
295                 if (o_name) {
296                         switch (verify_ordered(o_mode, o_name, mode, name)) {
297                         case TREE_UNORDERED:
298                                 not_properly_sorted = 1;
299                                 break;
300                         case TREE_HAS_DUPS:
301                                 has_dup_entries = 1;
302                                 break;
303                         default:
304                                 break;
305                         }
306                 }
307
308                 o_mode = mode;
309                 o_name = name;
310         }
311
312         retval = 0;
313         if (has_null_sha1)
314                 retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
315         if (has_full_path)
316                 retval += report(options, &item->object, FSCK_MSG_FULL_PATHNAME, "contains full pathnames");
317         if (has_empty_name)
318                 retval += report(options, &item->object, FSCK_MSG_EMPTY_NAME, "contains empty pathname");
319         if (has_dot)
320                 retval += report(options, &item->object, FSCK_MSG_HAS_DOT, "contains '.'");
321         if (has_dotdot)
322                 retval += report(options, &item->object, FSCK_MSG_HAS_DOTDOT, "contains '..'");
323         if (has_dotgit)
324                 retval += report(options, &item->object, FSCK_MSG_HAS_DOTGIT, "contains '.git'");
325         if (has_zero_pad)
326                 retval += report(options, &item->object, FSCK_MSG_ZERO_PADDED_FILEMODE, "contains zero-padded file modes");
327         if (has_bad_modes)
328                 retval += report(options, &item->object, FSCK_MSG_BAD_FILEMODE, "contains bad file modes");
329         if (has_dup_entries)
330                 retval += report(options, &item->object, FSCK_MSG_DUPLICATE_ENTRIES, "contains duplicate file entries");
331         if (not_properly_sorted)
332                 retval += report(options, &item->object, FSCK_MSG_TREE_NOT_SORTED, "not properly sorted");
333         return retval;
334 }
335
336 static int require_end_of_header(const void *data, unsigned long size,
337         struct object *obj, struct fsck_options *options)
338 {
339         const char *buffer = (const char *)data;
340         unsigned long i;
341
342         for (i = 0; i < size; i++) {
343                 switch (buffer[i]) {
344                 case '\0':
345                         return report(options, obj,
346                                 FSCK_MSG_NUL_IN_HEADER,
347                                 "unterminated header: NUL at offset %ld", i);
348                 case '\n':
349                         if (i + 1 < size && buffer[i + 1] == '\n')
350                                 return 0;
351                 }
352         }
353
354         return report(options, obj,
355                 FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
356 }
357
358 static int fsck_ident(const char **ident, struct object *obj, struct fsck_options *options)
359 {
360         char *end;
361
362         if (**ident == '<')
363                 return report(options, obj, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
364         *ident += strcspn(*ident, "<>\n");
365         if (**ident == '>')
366                 return report(options, obj, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
367         if (**ident != '<')
368                 return report(options, obj, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
369         if ((*ident)[-1] != ' ')
370                 return report(options, obj, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
371         (*ident)++;
372         *ident += strcspn(*ident, "<>\n");
373         if (**ident != '>')
374                 return report(options, obj, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
375         (*ident)++;
376         if (**ident != ' ')
377                 return report(options, obj, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
378         (*ident)++;
379         if (**ident == '0' && (*ident)[1] != ' ')
380                 return report(options, obj, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
381         if (date_overflows(strtoul(*ident, &end, 10)))
382                 return report(options, obj, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
383         if (end == *ident || *end != ' ')
384                 return report(options, obj, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
385         *ident = end + 1;
386         if ((**ident != '+' && **ident != '-') ||
387             !isdigit((*ident)[1]) ||
388             !isdigit((*ident)[2]) ||
389             !isdigit((*ident)[3]) ||
390             !isdigit((*ident)[4]) ||
391             ((*ident)[5] != '\n'))
392                 return report(options, obj, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
393         (*ident) += 6;
394         return 0;
395 }
396
397 static int fsck_commit_buffer(struct commit *commit, const char *buffer,
398         unsigned long size, struct fsck_options *options)
399 {
400         unsigned char tree_sha1[20], sha1[20];
401         struct commit_graft *graft;
402         unsigned parent_count, parent_line_count = 0;
403         int err;
404
405         if (require_end_of_header(buffer, size, &commit->object, options))
406                 return -1;
407
408         if (!skip_prefix(buffer, "tree ", &buffer))
409                 return report(options, &commit->object, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
410         if (get_sha1_hex(buffer, tree_sha1) || buffer[40] != '\n')
411                 return report(options, &commit->object, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
412         buffer += 41;
413         while (skip_prefix(buffer, "parent ", &buffer)) {
414                 if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n')
415                         return report(options, &commit->object, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
416                 buffer += 41;
417                 parent_line_count++;
418         }
419         graft = lookup_commit_graft(commit->object.sha1);
420         parent_count = commit_list_count(commit->parents);
421         if (graft) {
422                 if (graft->nr_parent == -1 && !parent_count)
423                         ; /* shallow commit */
424                 else if (graft->nr_parent != parent_count)
425                         return report(options, &commit->object, FSCK_MSG_MISSING_GRAFT, "graft objects missing");
426         } else {
427                 if (parent_count != parent_line_count)
428                         return report(options, &commit->object, FSCK_MSG_MISSING_PARENT, "parent objects missing");
429         }
430         if (!skip_prefix(buffer, "author ", &buffer))
431                 return report(options, &commit->object, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
432         err = fsck_ident(&buffer, &commit->object, options);
433         if (err)
434                 return err;
435         if (!skip_prefix(buffer, "committer ", &buffer))
436                 return report(options, &commit->object, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
437         err = fsck_ident(&buffer, &commit->object, options);
438         if (err)
439                 return err;
440         if (!commit->tree)
441                 return report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", sha1_to_hex(tree_sha1));
442
443         return 0;
444 }
445
446 static int fsck_commit(struct commit *commit, const char *data,
447         unsigned long size, struct fsck_options *options)
448 {
449         const char *buffer = data ?  data : get_commit_buffer(commit, &size);
450         int ret = fsck_commit_buffer(commit, buffer, size, options);
451         if (!data)
452                 unuse_commit_buffer(commit, buffer);
453         return ret;
454 }
455
456 static int fsck_tag_buffer(struct tag *tag, const char *data,
457         unsigned long size, struct fsck_options *options)
458 {
459         unsigned char sha1[20];
460         int ret = 0;
461         const char *buffer;
462         char *to_free = NULL, *eol;
463         struct strbuf sb = STRBUF_INIT;
464
465         if (data)
466                 buffer = data;
467         else {
468                 enum object_type type;
469
470                 buffer = to_free =
471                         read_sha1_file(tag->object.sha1, &type, &size);
472                 if (!buffer)
473                         return report(options, &tag->object,
474                                 FSCK_MSG_MISSING_TAG_OBJECT,
475                                 "cannot read tag object");
476
477                 if (type != OBJ_TAG) {
478                         ret = report(options, &tag->object,
479                                 FSCK_MSG_TAG_OBJECT_NOT_TAG,
480                                 "expected tag got %s",
481                             typename(type));
482                         goto done;
483                 }
484         }
485
486         if (require_end_of_header(buffer, size, &tag->object, options))
487                 goto done;
488
489         if (!skip_prefix(buffer, "object ", &buffer)) {
490                 ret = report(options, &tag->object, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
491                 goto done;
492         }
493         if (get_sha1_hex(buffer, sha1) || buffer[40] != '\n') {
494                 ret = report(options, &tag->object, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
495                 goto done;
496         }
497         buffer += 41;
498
499         if (!skip_prefix(buffer, "type ", &buffer)) {
500                 ret = report(options, &tag->object, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
501                 goto done;
502         }
503         eol = strchr(buffer, '\n');
504         if (!eol) {
505                 ret = report(options, &tag->object, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
506                 goto done;
507         }
508         if (type_from_string_gently(buffer, eol - buffer, 1) < 0)
509                 ret = report(options, &tag->object, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
510         if (ret)
511                 goto done;
512         buffer = eol + 1;
513
514         if (!skip_prefix(buffer, "tag ", &buffer)) {
515                 ret = report(options, &tag->object, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
516                 goto done;
517         }
518         eol = strchr(buffer, '\n');
519         if (!eol) {
520                 ret = report(options, &tag->object, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
521                 goto done;
522         }
523         strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
524         if (check_refname_format(sb.buf, 0))
525                 report(options, &tag->object, FSCK_MSG_BAD_TAG_NAME,
526                            "invalid 'tag' name: %.*s",
527                            (int)(eol - buffer), buffer);
528         buffer = eol + 1;
529
530         if (!skip_prefix(buffer, "tagger ", &buffer))
531                 /* early tags do not contain 'tagger' lines; warn only */
532                 report(options, &tag->object, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
533         else
534                 ret = fsck_ident(&buffer, &tag->object, options);
535
536 done:
537         strbuf_release(&sb);
538         free(to_free);
539         return ret;
540 }
541
542 static int fsck_tag(struct tag *tag, const char *data,
543         unsigned long size, struct fsck_options *options)
544 {
545         struct object *tagged = tag->tagged;
546
547         if (!tagged)
548                 return report(options, &tag->object, FSCK_MSG_BAD_TAG_OBJECT, "could not load tagged object");
549
550         return fsck_tag_buffer(tag, data, size, options);
551 }
552
553 int fsck_object(struct object *obj, void *data, unsigned long size,
554         struct fsck_options *options)
555 {
556         if (!obj)
557                 return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
558
559         if (obj->type == OBJ_BLOB)
560                 return 0;
561         if (obj->type == OBJ_TREE)
562                 return fsck_tree((struct tree *) obj, options);
563         if (obj->type == OBJ_COMMIT)
564                 return fsck_commit((struct commit *) obj, (const char *) data,
565                         size, options);
566         if (obj->type == OBJ_TAG)
567                 return fsck_tag((struct tag *) obj, (const char *) data,
568                         size, options);
569
570         return report(options, obj, FSCK_MSG_UNKNOWN_TYPE, "unknown type '%d' (internal fsck error)",
571                           obj->type);
572 }
573
574 int fsck_error_function(struct object *obj, int msg_type, const char *message)
575 {
576         error("object %s: %s", sha1_to_hex(obj->sha1), message);
577         return 1;
578 }