[PATCH] KVM: fix lockup on 32-bit intel hosts with nx disabled in the bios
[linux-2.6] / fs / jffs2 / readinode.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30         struct rb_node **p = &list->rb_node;
31         struct rb_node * parent = NULL;
32         struct jffs2_tmp_dnode_info *this;
33
34         while (*p) {
35                 parent = *p;
36                 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37
38                 /* There may actually be a collision here, but it doesn't
39                    actually matter. As long as the two nodes with the same
40                    version are together, it's all fine. */
41                 if (tn->version > this->version)
42                         p = &(*p)->rb_left;
43                 else
44                         p = &(*p)->rb_right;
45         }
46
47         rb_link_node(&tn->rb, parent, p);
48         rb_insert_color(&tn->rb, list);
49 }
50
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53         struct rb_node *this;
54         struct jffs2_tmp_dnode_info *tn;
55
56         this = list->rb_node;
57
58         /* Now at bottom of tree */
59         while (this) {
60                 if (this->rb_left)
61                         this = this->rb_left;
62                 else if (this->rb_right)
63                         this = this->rb_right;
64                 else {
65                         tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66                         jffs2_free_full_dnode(tn->fn);
67                         jffs2_free_tmp_dnode_info(tn);
68
69                         this = rb_parent(this);
70                         if (!this)
71                                 break;
72
73                         if (this->rb_left == &tn->rb)
74                                 this->rb_left = NULL;
75                         else if (this->rb_right == &tn->rb)
76                                 this->rb_right = NULL;
77                         else BUG();
78                 }
79         }
80         list->rb_node = NULL;
81 }
82
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85         struct jffs2_full_dirent *next;
86
87         while (fd) {
88                 next = fd->next;
89                 jffs2_free_full_dirent(fd);
90                 fd = next;
91         }
92 }
93
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97         while (ref && ref->next_in_ino) {
98                 if (!ref_obsolete(ref))
99                         return ref;
100                 dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101                 ref = ref->next_in_ino;
102         }
103         return NULL;
104 }
105
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  *          1 if the node should be marked obsolete;
112  *          negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115                                 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116                                 uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118         struct jffs2_full_dirent *fd;
119         uint32_t crc;
120
121         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
122         BUG_ON(ref_obsolete(ref));
123
124         crc = crc32(0, rd, sizeof(*rd) - 8);
125         if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126                 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127                              ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128                 return 1;
129         }
130
131         /* If we've never checked the CRCs on this node, check them now */
132         if (ref_flags(ref) == REF_UNCHECKED) {
133                 struct jffs2_eraseblock *jeb;
134                 int len;
135
136                 /* Sanity check */
137                 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138                         JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139                                     ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140                         return 1;
141                 }
142
143                 jeb = &c->blocks[ref->flash_offset / c->sector_size];
144                 len = ref_totlen(c, jeb, ref);
145
146                 spin_lock(&c->erase_completion_lock);
147                 jeb->used_size += len;
148                 jeb->unchecked_size -= len;
149                 c->used_size += len;
150                 c->unchecked_size -= len;
151                 ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
152                 spin_unlock(&c->erase_completion_lock);
153         }
154
155         fd = jffs2_alloc_full_dirent(rd->nsize + 1);
156         if (unlikely(!fd))
157                 return -ENOMEM;
158
159         fd->raw = ref;
160         fd->version = je32_to_cpu(rd->version);
161         fd->ino = je32_to_cpu(rd->ino);
162         fd->type = rd->type;
163
164         /* Pick out the mctime of the latest dirent */
165         if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
166                 *mctime_ver = fd->version;
167                 *latest_mctime = je32_to_cpu(rd->mctime);
168         }
169
170         /*
171          * Copy as much of the name as possible from the raw
172          * dirent we've already read from the flash.
173          */
174         if (read > sizeof(*rd))
175                 memcpy(&fd->name[0], &rd->name[0],
176                        min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
177
178         /* Do we need to copy any more of the name directly from the flash? */
179         if (rd->nsize + sizeof(*rd) > read) {
180                 /* FIXME: point() */
181                 int err;
182                 int already = read - sizeof(*rd);
183
184                 err = jffs2_flash_read(c, (ref_offset(ref)) + read,
185                                 rd->nsize - already, &read, &fd->name[already]);
186                 if (unlikely(read != rd->nsize - already) && likely(!err))
187                         return -EIO;
188
189                 if (unlikely(err)) {
190                         JFFS2_ERROR("read remainder of name: error %d\n", err);
191                         jffs2_free_full_dirent(fd);
192                         return -EIO;
193                 }
194         }
195
196         fd->nhash = full_name_hash(fd->name, rd->nsize);
197         fd->next = NULL;
198         fd->name[rd->nsize] = '\0';
199
200         /*
201          * Wheee. We now have a complete jffs2_full_dirent structure, with
202          * the name in it and everything. Link it into the list
203          */
204         jffs2_add_fd_to_list(c, fd, fdp);
205
206         return 0;
207 }
208
209 /*
210  * Helper function for jffs2_get_inode_nodes().
211  * It is called every time an inode node is found.
212  *
213  * Returns: 0 on succes;
214  *          1 if the node should be marked obsolete;
215  *          negative error code on failure.
216  */
217 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218                              struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
219                              uint32_t *latest_mctime, uint32_t *mctime_ver)
220 {
221         struct jffs2_tmp_dnode_info *tn;
222         uint32_t len, csize;
223         int ret = 1;
224         uint32_t crc;
225
226         /* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
227         BUG_ON(ref_obsolete(ref));
228
229         crc = crc32(0, rd, sizeof(*rd) - 8);
230         if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231                 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232                              ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233                 return 1;
234         }
235
236         tn = jffs2_alloc_tmp_dnode_info();
237         if (!tn) {
238                 JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
239                 return -ENOMEM;
240         }
241
242         tn->partial_crc = 0;
243         csize = je32_to_cpu(rd->csize);
244
245         /* If we've never checked the CRCs on this node, check them now */
246         if (ref_flags(ref) == REF_UNCHECKED) {
247
248                 /* Sanity checks */
249                 if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
250                     unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
251                                 JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
252                                 jffs2_dbg_dump_node(c, ref_offset(ref));
253                         goto free_out;
254                 }
255
256                 if (jffs2_is_writebuffered(c) && csize != 0) {
257                         /* At this point we are supposed to check the data CRC
258                          * of our unchecked node. But thus far, we do not
259                          * know whether the node is valid or obsolete. To
260                          * figure this out, we need to walk all the nodes of
261                          * the inode and build the inode fragtree. We don't
262                          * want to spend time checking data of nodes which may
263                          * later be found to be obsolete. So we put off the full
264                          * data CRC checking until we have read all the inode
265                          * nodes and have started building the fragtree.
266                          *
267                          * The fragtree is being built starting with nodes
268                          * having the highest version number, so we'll be able
269                          * to detect whether a node is valid (i.e., it is not
270                          * overlapped by a node with higher version) or not.
271                          * And we'll be able to check only those nodes, which
272                          * are not obsolete.
273                          *
274                          * Of course, this optimization only makes sense in case
275                          * of NAND flashes (or other flashes whith
276                          * !jffs2_can_mark_obsolete()), since on NOR flashes
277                          * nodes are marked obsolete physically.
278                          *
279                          * Since NAND flashes (or other flashes with
280                          * jffs2_is_writebuffered(c)) are anyway read by
281                          * fractions of c->wbuf_pagesize, and we have just read
282                          * the node header, it is likely that the starting part
283                          * of the node data is also read when we read the
284                          * header. So we don't mind to check the CRC of the
285                          * starting part of the data of the node now, and check
286                          * the second part later (in jffs2_check_node_data()).
287                          * Of course, we will not need to re-read and re-check
288                          * the NAND page which we have just read. This is why we
289                          * read the whole NAND page at jffs2_get_inode_nodes(),
290                          * while we needed only the node header.
291                          */
292                         unsigned char *buf;
293
294                         /* 'buf' will point to the start of data */
295                         buf = (unsigned char *)rd + sizeof(*rd);
296                         /* len will be the read data length */
297                         len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
298                         tn->partial_crc = crc32(0, buf, len);
299
300                         dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
301
302                         /* If we actually calculated the whole data CRC
303                          * and it is wrong, drop the node. */
304                         if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
305                                 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
306                                         ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
307                                 goto free_out;
308                         }
309
310                 } else if (csize == 0) {
311                         /*
312                          * We checked the header CRC. If the node has no data, adjust
313                          * the space accounting now. For other nodes this will be done
314                          * later either when the node is marked obsolete or when its
315                          * data is checked.
316                          */
317                         struct jffs2_eraseblock *jeb;
318
319                         dbg_readinode("the node has no data.\n");
320                         jeb = &c->blocks[ref->flash_offset / c->sector_size];
321                         len = ref_totlen(c, jeb, ref);
322
323                         spin_lock(&c->erase_completion_lock);
324                         jeb->used_size += len;
325                         jeb->unchecked_size -= len;
326                         c->used_size += len;
327                         c->unchecked_size -= len;
328                         ref->flash_offset = ref_offset(ref) | REF_NORMAL;
329                         spin_unlock(&c->erase_completion_lock);
330                 }
331         }
332
333         tn->fn = jffs2_alloc_full_dnode();
334         if (!tn->fn) {
335                 JFFS2_ERROR("alloc fn failed\n");
336                 ret = -ENOMEM;
337                 goto free_out;
338         }
339
340         tn->version = je32_to_cpu(rd->version);
341         tn->fn->ofs = je32_to_cpu(rd->offset);
342         tn->data_crc = je32_to_cpu(rd->data_crc);
343         tn->csize = csize;
344         tn->fn->raw = ref;
345
346         /* There was a bug where we wrote hole nodes out with
347            csize/dsize swapped. Deal with it */
348         if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
349                 tn->fn->size = csize;
350         else // normal case...
351                 tn->fn->size = je32_to_cpu(rd->dsize);
352
353         dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354                   ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355
356         jffs2_add_tn_to_tree(tn, tnp);
357
358         return 0;
359
360 free_out:
361         jffs2_free_tmp_dnode_info(tn);
362         return ret;
363 }
364
365 /*
366  * Helper function for jffs2_get_inode_nodes().
367  * It is called every time an unknown node is found.
368  *
369  * Returns: 0 on success;
370  *          1 if the node should be marked obsolete;
371  *          negative error code on failure.
372  */
373 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
374 {
375         /* We don't mark unknown nodes as REF_UNCHECKED */
376         BUG_ON(ref_flags(ref) == REF_UNCHECKED);
377
378         un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
379
380         switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
381
382         case JFFS2_FEATURE_INCOMPAT:
383                 JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
384                             je16_to_cpu(un->nodetype), ref_offset(ref));
385                 /* EEP */
386                 BUG();
387                 break;
388
389         case JFFS2_FEATURE_ROCOMPAT:
390                 JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
391                             je16_to_cpu(un->nodetype), ref_offset(ref));
392                 BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
393                 break;
394
395         case JFFS2_FEATURE_RWCOMPAT_COPY:
396                 JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
397                              je16_to_cpu(un->nodetype), ref_offset(ref));
398                 break;
399
400         case JFFS2_FEATURE_RWCOMPAT_DELETE:
401                 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
402                              je16_to_cpu(un->nodetype), ref_offset(ref));
403                 return 1;
404         }
405
406         return 0;
407 }
408
409 /*
410  * Helper function for jffs2_get_inode_nodes().
411  * The function detects whether more data should be read and reads it if yes.
412  *
413  * Returns: 0 on succes;
414  *          negative error code on failure.
415  */
416 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
417                      int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
418 {
419         int right_len, err, len;
420         size_t retlen;
421         uint32_t offs;
422
423         if (jffs2_is_writebuffered(c)) {
424                 right_len = c->wbuf_pagesize - (bufstart - buf);
425                 if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
426                         right_len += c->wbuf_pagesize;
427         } else
428                 right_len = right_size;
429
430         if (*rdlen == right_len)
431                 return 0;
432
433         /* We need to read more data */
434         offs = ref_offset(ref) + *rdlen;
435         if (jffs2_is_writebuffered(c)) {
436                 bufstart = buf + c->wbuf_pagesize;
437                 len = c->wbuf_pagesize;
438         } else {
439                 bufstart = buf + *rdlen;
440                 len = right_size - *rdlen;
441         }
442
443         dbg_readinode("read more %d bytes\n", len);
444
445         err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
446         if (err) {
447                 JFFS2_ERROR("can not read %d bytes from 0x%08x, "
448                         "error code: %d.\n", len, offs, err);
449                 return err;
450         }
451
452         if (retlen < len) {
453                 JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
454                                 offs, retlen, len);
455                 return -EIO;
456         }
457
458         *rdlen = right_len;
459
460         return 0;
461 }
462
463 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
464    with this ino, returning the former in order of version */
465 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
466                                  struct rb_root *tnp, struct jffs2_full_dirent **fdp,
467                                  uint32_t *highest_version, uint32_t *latest_mctime,
468                                  uint32_t *mctime_ver)
469 {
470         struct jffs2_raw_node_ref *ref, *valid_ref;
471         struct rb_root ret_tn = RB_ROOT;
472         struct jffs2_full_dirent *ret_fd = NULL;
473         unsigned char *buf = NULL;
474         union jffs2_node_union *node;
475         size_t retlen;
476         int len, err;
477
478         *mctime_ver = 0;
479
480         dbg_readinode("ino #%u\n", f->inocache->ino);
481
482         if (jffs2_is_writebuffered(c)) {
483                 /*
484                  * If we have the write buffer, we assume the minimal I/O unit
485                  * is c->wbuf_pagesize. We implement some optimizations which in
486                  * this case and we need a temporary buffer of size =
487                  * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
488                  * Basically, we want to read not only the node header, but the
489                  * whole wbuf (NAND page in case of NAND) or 2, if the node
490                  * header overlaps the border between the 2 wbufs.
491                  */
492                 len = 2*c->wbuf_pagesize;
493         } else {
494                 /*
495                  * When there is no write buffer, the size of the temporary
496                  * buffer is the size of the larges node header.
497                  */
498                 len = sizeof(union jffs2_node_union);
499         }
500
501         /* FIXME: in case of NOR and available ->point() this
502          * needs to be fixed. */
503         buf = kmalloc(len, GFP_KERNEL);
504         if (!buf)
505                 return -ENOMEM;
506
507         spin_lock(&c->erase_completion_lock);
508         valid_ref = jffs2_first_valid_node(f->inocache->nodes);
509         if (!valid_ref && f->inocache->ino != 1)
510                 JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
511         while (valid_ref) {
512                 unsigned char *bufstart;
513
514                 /* We can hold a pointer to a non-obsolete node without the spinlock,
515                    but _obsolete_ nodes may disappear at any time, if the block
516                    they're in gets erased. So if we mark 'ref' obsolete while we're
517                    not holding the lock, it can go away immediately. For that reason,
518                    we find the next valid node first, before processing 'ref'.
519                 */
520                 ref = valid_ref;
521                 valid_ref = jffs2_first_valid_node(ref->next_in_ino);
522                 spin_unlock(&c->erase_completion_lock);
523
524                 cond_resched();
525
526                 /*
527                  * At this point we don't know the type of the node we're going
528                  * to read, so we do not know the size of its header. In order
529                  * to minimize the amount of flash IO we assume the node has
530                  * size = JFFS2_MIN_NODE_HEADER.
531                  */
532                 if (jffs2_is_writebuffered(c)) {
533                         /*
534                          * We treat 'buf' as 2 adjacent wbufs. We want to
535                          * adjust bufstart such as it points to the
536                          * beginning of the node within this wbuf.
537                          */
538                         bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
539                         /* We will read either one wbuf or 2 wbufs. */
540                         len = c->wbuf_pagesize - (bufstart - buf);
541                         if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
542                                 /* The header spans the border of the first wbuf */
543                                 len += c->wbuf_pagesize;
544                         }
545                 } else {
546                         bufstart = buf;
547                         len = JFFS2_MIN_NODE_HEADER;
548                 }
549
550                 dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
551
552                 /* FIXME: point() */
553                 err = jffs2_flash_read(c, ref_offset(ref), len,
554                                        &retlen, bufstart);
555                 if (err) {
556                         JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
557                         goto free_out;
558                 }
559
560                 if (retlen < len) {
561                         JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
562                         err = -EIO;
563                         goto free_out;
564                 }
565
566                 node = (union jffs2_node_union *)bufstart;
567
568                 /* No need to mask in the valid bit; it shouldn't be invalid */
569                 if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
570                         JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
571                                      ref_offset(ref), je16_to_cpu(node->u.magic),
572                                      je16_to_cpu(node->u.nodetype),
573                                      je32_to_cpu(node->u.totlen),
574                                      je32_to_cpu(node->u.hdr_crc));
575                         jffs2_dbg_dump_node(c, ref_offset(ref));
576                         jffs2_mark_node_obsolete(c, ref);
577                         goto cont;
578                 }
579
580                 switch (je16_to_cpu(node->u.nodetype)) {
581
582                 case JFFS2_NODETYPE_DIRENT:
583
584                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
585                                 err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
586                                 if (unlikely(err))
587                                         goto free_out;
588                         }
589
590                         err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
591                         if (err == 1) {
592                                 jffs2_mark_node_obsolete(c, ref);
593                                 break;
594                         } else if (unlikely(err))
595                                 goto free_out;
596
597                         if (je32_to_cpu(node->d.version) > *highest_version)
598                                 *highest_version = je32_to_cpu(node->d.version);
599
600                         break;
601
602                 case JFFS2_NODETYPE_INODE:
603
604                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
605                                 err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
606                                 if (unlikely(err))
607                                         goto free_out;
608                         }
609
610                         err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
611                         if (err == 1) {
612                                 jffs2_mark_node_obsolete(c, ref);
613                                 break;
614                         } else if (unlikely(err))
615                                 goto free_out;
616
617                         if (je32_to_cpu(node->i.version) > *highest_version)
618                                 *highest_version = je32_to_cpu(node->i.version);
619
620                         break;
621
622                 default:
623                         if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
624                                 err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
625                                 if (unlikely(err))
626                                         goto free_out;
627                         }
628
629                         err = read_unknown(c, ref, &node->u);
630                         if (err == 1) {
631                                 jffs2_mark_node_obsolete(c, ref);
632                                 break;
633                         } else if (unlikely(err))
634                                 goto free_out;
635
636                 }
637         cont:
638                 spin_lock(&c->erase_completion_lock);
639         }
640
641         spin_unlock(&c->erase_completion_lock);
642         *tnp = ret_tn;
643         *fdp = ret_fd;
644         kfree(buf);
645
646         dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
647                         f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
648         return 0;
649
650  free_out:
651         jffs2_free_tmp_dnode_info_list(&ret_tn);
652         jffs2_free_full_dirent_list(ret_fd);
653         kfree(buf);
654         return err;
655 }
656
657 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
658                                         struct jffs2_inode_info *f,
659                                         struct jffs2_raw_inode *latest_node)
660 {
661         struct jffs2_tmp_dnode_info *tn;
662         struct rb_root tn_list;
663         struct rb_node *rb, *repl_rb;
664         struct jffs2_full_dirent *fd_list;
665         struct jffs2_full_dnode *fn, *first_fn = NULL;
666         uint32_t crc;
667         uint32_t latest_mctime, mctime_ver;
668         size_t retlen;
669         int ret;
670
671         dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
672
673         /* Grab all nodes relevant to this ino */
674         ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
675
676         if (ret) {
677                 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
678                 if (f->inocache->state == INO_STATE_READING)
679                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
680                 return ret;
681         }
682         f->dents = fd_list;
683
684         rb = rb_first(&tn_list);
685
686         while (rb) {
687                 cond_resched();
688                 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
689                 fn = tn->fn;
690                 ret = 1;
691                 dbg_readinode("consider node ver %u, phys offset "
692                         "%#08x(%d), range %u-%u.\n", tn->version,
693                         ref_offset(fn->raw), ref_flags(fn->raw),
694                         fn->ofs, fn->ofs + fn->size);
695
696                 if (fn->size) {
697                         ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
698                         /* TODO: the error code isn't checked, check it */
699                         jffs2_dbg_fragtree_paranoia_check_nolock(f);
700                         BUG_ON(ret < 0);
701                         if (!first_fn && ret == 0)
702                                 first_fn = fn;
703                 } else if (!first_fn) {
704                         first_fn = fn;
705                         f->metadata = fn;
706                         ret = 0; /* Prevent freeing the metadata update node */
707                 } else
708                         jffs2_mark_node_obsolete(c, fn->raw);
709
710                 BUG_ON(rb->rb_left);
711                 if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
712                         /* We were then left-hand child of our parent. We need
713                          * to move our own right-hand child into our place. */
714                         repl_rb = rb->rb_right;
715                         if (repl_rb)
716                                 rb_set_parent(repl_rb, rb_parent(rb));
717                 } else
718                         repl_rb = NULL;
719
720                 rb = rb_next(rb);
721
722                 /* Remove the spent tn from the tree; don't bother rebalancing
723                  * but put our right-hand child in our own place. */
724                 if (rb_parent(&tn->rb)) {
725                         if (rb_parent(&tn->rb)->rb_left == &tn->rb)
726                                 rb_parent(&tn->rb)->rb_left = repl_rb;
727                         else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
728                                 rb_parent(&tn->rb)->rb_right = repl_rb;
729                         else BUG();
730                 } else if (tn->rb.rb_right)
731                         rb_set_parent(tn->rb.rb_right, NULL);
732
733                 jffs2_free_tmp_dnode_info(tn);
734                 if (ret) {
735                         dbg_readinode("delete dnode %u-%u.\n",
736                                 fn->ofs, fn->ofs + fn->size);
737                         jffs2_free_full_dnode(fn);
738                 }
739         }
740         jffs2_dbg_fragtree_paranoia_check_nolock(f);
741
742         BUG_ON(first_fn && ref_obsolete(first_fn->raw));
743
744         fn = first_fn;
745         if (unlikely(!first_fn)) {
746                 /* No data nodes for this inode. */
747                 if (f->inocache->ino != 1) {
748                         JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
749                         if (!fd_list) {
750                                 if (f->inocache->state == INO_STATE_READING)
751                                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
752                                 return -EIO;
753                         }
754                         JFFS2_NOTICE("but it has children so we fake some modes for it\n");
755                 }
756                 latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
757                 latest_node->version = cpu_to_je32(0);
758                 latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
759                 latest_node->isize = cpu_to_je32(0);
760                 latest_node->gid = cpu_to_je16(0);
761                 latest_node->uid = cpu_to_je16(0);
762                 if (f->inocache->state == INO_STATE_READING)
763                         jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
764                 return 0;
765         }
766
767         ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
768         if (ret || retlen != sizeof(*latest_node)) {
769                 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
770                         ret, retlen, sizeof(*latest_node));
771                 /* FIXME: If this fails, there seems to be a memory leak. Find it. */
772                 up(&f->sem);
773                 jffs2_do_clear_inode(c, f);
774                 return ret?ret:-EIO;
775         }
776
777         crc = crc32(0, latest_node, sizeof(*latest_node)-8);
778         if (crc != je32_to_cpu(latest_node->node_crc)) {
779                 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
780                         f->inocache->ino, ref_offset(fn->raw));
781                 up(&f->sem);
782                 jffs2_do_clear_inode(c, f);
783                 return -EIO;
784         }
785
786         switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
787         case S_IFDIR:
788                 if (mctime_ver > je32_to_cpu(latest_node->version)) {
789                         /* The times in the latest_node are actually older than
790                            mctime in the latest dirent. Cheat. */
791                         latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
792                 }
793                 break;
794
795
796         case S_IFREG:
797                 /* If it was a regular file, truncate it to the latest node's isize */
798                 jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
799                 break;
800
801         case S_IFLNK:
802                 /* Hack to work around broken isize in old symlink code.
803                    Remove this when dwmw2 comes to his senses and stops
804                    symlinks from being an entirely gratuitous special
805                    case. */
806                 if (!je32_to_cpu(latest_node->isize))
807                         latest_node->isize = latest_node->dsize;
808
809                 if (f->inocache->state != INO_STATE_CHECKING) {
810                         /* Symlink's inode data is the target path. Read it and
811                          * keep in RAM to facilitate quick follow symlink
812                          * operation. */
813                         f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
814                         if (!f->target) {
815                                 JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
816                                 up(&f->sem);
817                                 jffs2_do_clear_inode(c, f);
818                                 return -ENOMEM;
819                         }
820
821                         ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
822                                                 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
823
824                         if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
825                                 if (retlen != je32_to_cpu(latest_node->csize))
826                                         ret = -EIO;
827                                 kfree(f->target);
828                                 f->target = NULL;
829                                 up(&f->sem);
830                                 jffs2_do_clear_inode(c, f);
831                                 return -ret;
832                         }
833
834                         f->target[je32_to_cpu(latest_node->csize)] = '\0';
835                         dbg_readinode("symlink's target '%s' cached\n", f->target);
836                 }
837
838                 /* fall through... */
839
840         case S_IFBLK:
841         case S_IFCHR:
842                 /* Certain inode types should have only one data node, and it's
843                    kept as the metadata node */
844                 if (f->metadata) {
845                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
846                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
847                         up(&f->sem);
848                         jffs2_do_clear_inode(c, f);
849                         return -EIO;
850                 }
851                 if (!frag_first(&f->fragtree)) {
852                         JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
853                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
854                         up(&f->sem);
855                         jffs2_do_clear_inode(c, f);
856                         return -EIO;
857                 }
858                 /* ASSERT: f->fraglist != NULL */
859                 if (frag_next(frag_first(&f->fragtree))) {
860                         JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
861                                f->inocache->ino, jemode_to_cpu(latest_node->mode));
862                         /* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
863                         up(&f->sem);
864                         jffs2_do_clear_inode(c, f);
865                         return -EIO;
866                 }
867                 /* OK. We're happy */
868                 f->metadata = frag_first(&f->fragtree)->node;
869                 jffs2_free_node_frag(frag_first(&f->fragtree));
870                 f->fragtree = RB_ROOT;
871                 break;
872         }
873         if (f->inocache->state == INO_STATE_READING)
874                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
875
876         return 0;
877 }
878
879 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
880 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
881                         uint32_t ino, struct jffs2_raw_inode *latest_node)
882 {
883         dbg_readinode("read inode #%u\n", ino);
884
885  retry_inocache:
886         spin_lock(&c->inocache_lock);
887         f->inocache = jffs2_get_ino_cache(c, ino);
888
889         if (f->inocache) {
890                 /* Check its state. We may need to wait before we can use it */
891                 switch(f->inocache->state) {
892                 case INO_STATE_UNCHECKED:
893                 case INO_STATE_CHECKEDABSENT:
894                         f->inocache->state = INO_STATE_READING;
895                         break;
896
897                 case INO_STATE_CHECKING:
898                 case INO_STATE_GC:
899                         /* If it's in either of these states, we need
900                            to wait for whoever's got it to finish and
901                            put it back. */
902                         dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
903                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
904                         goto retry_inocache;
905
906                 case INO_STATE_READING:
907                 case INO_STATE_PRESENT:
908                         /* Eep. This should never happen. It can
909                         happen if Linux calls read_inode() again
910                         before clear_inode() has finished though. */
911                         JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
912                         /* Fail. That's probably better than allowing it to succeed */
913                         f->inocache = NULL;
914                         break;
915
916                 default:
917                         BUG();
918                 }
919         }
920         spin_unlock(&c->inocache_lock);
921
922         if (!f->inocache && ino == 1) {
923                 /* Special case - no root inode on medium */
924                 f->inocache = jffs2_alloc_inode_cache();
925                 if (!f->inocache) {
926                         JFFS2_ERROR("cannot allocate inocache for root inode\n");
927                         return -ENOMEM;
928                 }
929                 dbg_readinode("creating inocache for root inode\n");
930                 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
931                 f->inocache->ino = f->inocache->nlink = 1;
932                 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
933                 f->inocache->state = INO_STATE_READING;
934                 jffs2_add_ino_cache(c, f->inocache);
935         }
936         if (!f->inocache) {
937                 JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
938                 return -ENOENT;
939         }
940
941         return jffs2_do_read_inode_internal(c, f, latest_node);
942 }
943
944 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
945 {
946         struct jffs2_raw_inode n;
947         struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL);
948         int ret;
949
950         if (!f)
951                 return -ENOMEM;
952
953         init_MUTEX_LOCKED(&f->sem);
954         f->inocache = ic;
955
956         ret = jffs2_do_read_inode_internal(c, f, &n);
957         if (!ret) {
958                 up(&f->sem);
959                 jffs2_do_clear_inode(c, f);
960         }
961         kfree (f);
962         return ret;
963 }
964
965 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
966 {
967         struct jffs2_full_dirent *fd, *fds;
968         int deleted;
969
970         jffs2_clear_acl(f);
971         jffs2_xattr_delete_inode(c, f->inocache);
972         down(&f->sem);
973         deleted = f->inocache && !f->inocache->nlink;
974
975         if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
976                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
977
978         if (f->metadata) {
979                 if (deleted)
980                         jffs2_mark_node_obsolete(c, f->metadata->raw);
981                 jffs2_free_full_dnode(f->metadata);
982         }
983
984         jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
985
986         if (f->target) {
987                 kfree(f->target);
988                 f->target = NULL;
989         }
990
991         fds = f->dents;
992         while(fds) {
993                 fd = fds;
994                 fds = fd->next;
995                 jffs2_free_full_dirent(fd);
996         }
997
998         if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
999                 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
1000                 if (f->inocache->nodes == (void *)f->inocache)
1001                         jffs2_del_ino_cache(c, f->inocache);
1002         }
1003
1004         up(&f->sem);
1005 }