[JFFS2][XATTR] Handling the duplicate JFFS2_NODETYPE_XATTR node cases.
[linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased.
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 spin_unlock(&c->erase_completion_lock);
87                 jffs2_flush_wbuf_pad(c);
88                 spin_lock(&c->erase_completion_lock);
89                 goto again;
90         } else {
91                 /* Eep. All were empty */
92                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93                 return NULL;
94         }
95
96         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97         list_del(&ret->list);
98         c->gcblock = ret;
99         ret->gc_node = ret->first_node;
100         if (!ret->gc_node) {
101                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102                 BUG();
103         }
104
105         /* Have we accidentally picked a clean block with wasted space ? */
106         if (ret->wasted_size) {
107                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108                 ret->dirty_size += ret->wasted_size;
109                 c->wasted_size -= ret->wasted_size;
110                 c->dirty_size += ret->wasted_size;
111                 ret->wasted_size = 0;
112         }
113
114         return ret;
115 }
116
117 /* jffs2_garbage_collect_pass
118  * Make a single attempt to progress GC. Move one node, and possibly
119  * start erasing one eraseblock.
120  */
121 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122 {
123         struct jffs2_inode_info *f;
124         struct jffs2_inode_cache *ic;
125         struct jffs2_eraseblock *jeb;
126         struct jffs2_raw_node_ref *raw;
127         int ret = 0, inum, nlink;
128         int xattr = 0;
129
130         if (down_interruptible(&c->alloc_sem))
131                 return -EINTR;
132
133         for (;;) {
134                 spin_lock(&c->erase_completion_lock);
135                 if (!c->unchecked_size)
136                         break;
137
138                 /* We can't start doing GC yet. We haven't finished checking
139                    the node CRCs etc. Do it now. */
140
141                 /* checked_ino is protected by the alloc_sem */
142                 if (c->checked_ino > c->highest_ino && xattr) {
143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144                                c->unchecked_size);
145                         jffs2_dbg_dump_block_lists_nolock(c);
146                         spin_unlock(&c->erase_completion_lock);
147                         BUG();
148                 }
149
150                 spin_unlock(&c->erase_completion_lock);
151
152                 if (!xattr)
153                         xattr = jffs2_verify_xattr(c);
154
155                 spin_lock(&c->inocache_lock);
156
157                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
158
159                 if (!ic) {
160                         spin_unlock(&c->inocache_lock);
161                         continue;
162                 }
163
164                 if (!ic->nlink) {
165                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166                                   ic->ino));
167                         spin_unlock(&c->inocache_lock);
168                         continue;
169                 }
170                 switch(ic->state) {
171                 case INO_STATE_CHECKEDABSENT:
172                 case INO_STATE_PRESENT:
173                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174                         spin_unlock(&c->inocache_lock);
175                         continue;
176
177                 case INO_STATE_GC:
178                 case INO_STATE_CHECKING:
179                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180                         spin_unlock(&c->inocache_lock);
181                         BUG();
182
183                 case INO_STATE_READING:
184                         /* We need to wait for it to finish, lest we move on
185                            and trigger the BUG() above while we haven't yet
186                            finished checking all its nodes */
187                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
188                         /* We need to come back again for the _same_ inode. We've
189                          made no progress in this case, but that should be OK */
190                         c->checked_ino--;
191
192                         up(&c->alloc_sem);
193                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194                         return 0;
195
196                 default:
197                         BUG();
198
199                 case INO_STATE_UNCHECKED:
200                         ;
201                 }
202                 ic->state = INO_STATE_CHECKING;
203                 spin_unlock(&c->inocache_lock);
204
205                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206
207                 ret = jffs2_do_crccheck_inode(c, ic);
208                 if (ret)
209                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210
211                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212                 up(&c->alloc_sem);
213                 return ret;
214         }
215
216         /* First, work out which block we're garbage-collecting */
217         jeb = c->gcblock;
218
219         if (!jeb)
220                 jeb = jffs2_find_gc_block(c);
221
222         if (!jeb) {
223                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224                 spin_unlock(&c->erase_completion_lock);
225                 up(&c->alloc_sem);
226                 return -EIO;
227         }
228
229         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230         D1(if (c->nextblock)
231            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232
233         if (!jeb->used_size) {
234                 up(&c->alloc_sem);
235                 goto eraseit;
236         }
237
238         raw = jeb->gc_node;
239
240         while(ref_obsolete(raw)) {
241                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242                 raw = raw->next_phys;
243                 if (unlikely(!raw)) {
244                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
245                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
246                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247                         jeb->gc_node = raw;
248                         spin_unlock(&c->erase_completion_lock);
249                         up(&c->alloc_sem);
250                         BUG();
251                 }
252         }
253         jeb->gc_node = raw;
254
255         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256
257         if (!raw->next_in_ino) {
258                 /* Inode-less node. Clean marker, snapshot or something like that */
259                 /* FIXME: If it's something that needs to be copied, including something
260                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
261                 spin_unlock(&c->erase_completion_lock);
262                 jffs2_mark_node_obsolete(c, raw);
263                 up(&c->alloc_sem);
264                 goto eraseit_lock;
265         }
266
267         ic = jffs2_raw_ref_to_ic(raw);
268
269 #ifdef CONFIG_JFFS2_FS_XATTR
270         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
271          * We can decide whether this node is inode or xattr by ic->class.     */
272         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
273             || ic->class == RAWNODE_CLASS_XATTR_REF) {
274                 BUG_ON(raw->next_in_ino != (void *)ic);
275                 spin_unlock(&c->erase_completion_lock);
276
277                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
278                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
279                 } else {
280                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
281                 }
282                 goto release_sem;
283         }
284 #endif
285
286         /* We need to hold the inocache. Either the erase_completion_lock or
287            the inocache_lock are sufficient; we trade down since the inocache_lock
288            causes less contention. */
289         spin_lock(&c->inocache_lock);
290
291         spin_unlock(&c->erase_completion_lock);
292
293         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
294
295         /* Three possibilities:
296            1. Inode is already in-core. We must iget it and do proper
297               updating to its fragtree, etc.
298            2. Inode is not in-core, node is REF_PRISTINE. We lock the
299               inocache to prevent a read_inode(), copy the node intact.
300            3. Inode is not in-core, node is not pristine. We must iget()
301               and take the slow path.
302         */
303
304         switch(ic->state) {
305         case INO_STATE_CHECKEDABSENT:
306                 /* It's been checked, but it's not currently in-core.
307                    We can just copy any pristine nodes, but have
308                    to prevent anyone else from doing read_inode() while
309                    we're at it, so we set the state accordingly */
310                 if (ref_flags(raw) == REF_PRISTINE)
311                         ic->state = INO_STATE_GC;
312                 else {
313                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
314                                   ic->ino));
315                 }
316                 break;
317
318         case INO_STATE_PRESENT:
319                 /* It's in-core. GC must iget() it. */
320                 break;
321
322         case INO_STATE_UNCHECKED:
323         case INO_STATE_CHECKING:
324         case INO_STATE_GC:
325                 /* Should never happen. We should have finished checking
326                    by the time we actually start doing any GC, and since
327                    we're holding the alloc_sem, no other garbage collection
328                    can happen.
329                 */
330                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
331                        ic->ino, ic->state);
332                 up(&c->alloc_sem);
333                 spin_unlock(&c->inocache_lock);
334                 BUG();
335
336         case INO_STATE_READING:
337                 /* Someone's currently trying to read it. We must wait for
338                    them to finish and then go through the full iget() route
339                    to do the GC. However, sometimes read_inode() needs to get
340                    the alloc_sem() (for marking nodes invalid) so we must
341                    drop the alloc_sem before sleeping. */
342
343                 up(&c->alloc_sem);
344                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
345                           ic->ino, ic->state));
346                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
347                 /* And because we dropped the alloc_sem we must start again from the
348                    beginning. Ponder chance of livelock here -- we're returning success
349                    without actually making any progress.
350
351                    Q: What are the chances that the inode is back in INO_STATE_READING
352                    again by the time we next enter this function? And that this happens
353                    enough times to cause a real delay?
354
355                    A: Small enough that I don't care :)
356                 */
357                 return 0;
358         }
359
360         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
361            node intact, and we don't have to muck about with the fragtree etc.
362            because we know it's not in-core. If it _was_ in-core, we go through
363            all the iget() crap anyway */
364
365         if (ic->state == INO_STATE_GC) {
366                 spin_unlock(&c->inocache_lock);
367
368                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
369
370                 spin_lock(&c->inocache_lock);
371                 ic->state = INO_STATE_CHECKEDABSENT;
372                 wake_up(&c->inocache_wq);
373
374                 if (ret != -EBADFD) {
375                         spin_unlock(&c->inocache_lock);
376                         goto release_sem;
377                 }
378
379                 /* Fall through if it wanted us to, with inocache_lock held */
380         }
381
382         /* Prevent the fairly unlikely race where the gcblock is
383            entirely obsoleted by the final close of a file which had
384            the only valid nodes in the block, followed by erasure,
385            followed by freeing of the ic because the erased block(s)
386            held _all_ the nodes of that inode.... never been seen but
387            it's vaguely possible. */
388
389         inum = ic->ino;
390         nlink = ic->nlink;
391         spin_unlock(&c->inocache_lock);
392
393         f = jffs2_gc_fetch_inode(c, inum, nlink);
394         if (IS_ERR(f)) {
395                 ret = PTR_ERR(f);
396                 goto release_sem;
397         }
398         if (!f) {
399                 ret = 0;
400                 goto release_sem;
401         }
402
403         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
404
405         jffs2_gc_release_inode(c, f);
406
407  release_sem:
408         up(&c->alloc_sem);
409
410  eraseit_lock:
411         /* If we've finished this block, start it erasing */
412         spin_lock(&c->erase_completion_lock);
413
414  eraseit:
415         if (c->gcblock && !c->gcblock->used_size) {
416                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
417                 /* We're GC'ing an empty block? */
418                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
419                 c->gcblock = NULL;
420                 c->nr_erasing_blocks++;
421                 jffs2_erase_pending_trigger(c);
422         }
423         spin_unlock(&c->erase_completion_lock);
424
425         return ret;
426 }
427
428 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
429                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
430 {
431         struct jffs2_node_frag *frag;
432         struct jffs2_full_dnode *fn = NULL;
433         struct jffs2_full_dirent *fd;
434         uint32_t start = 0, end = 0, nrfrags = 0;
435         int ret = 0;
436
437         down(&f->sem);
438
439         /* Now we have the lock for this inode. Check that it's still the one at the head
440            of the list. */
441
442         spin_lock(&c->erase_completion_lock);
443
444         if (c->gcblock != jeb) {
445                 spin_unlock(&c->erase_completion_lock);
446                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
447                 goto upnout;
448         }
449         if (ref_obsolete(raw)) {
450                 spin_unlock(&c->erase_completion_lock);
451                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
452                 /* They'll call again */
453                 goto upnout;
454         }
455         spin_unlock(&c->erase_completion_lock);
456
457         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
458         if (f->metadata && f->metadata->raw == raw) {
459                 fn = f->metadata;
460                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
461                 goto upnout;
462         }
463
464         /* FIXME. Read node and do lookup? */
465         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
466                 if (frag->node && frag->node->raw == raw) {
467                         fn = frag->node;
468                         end = frag->ofs + frag->size;
469                         if (!nrfrags++)
470                                 start = frag->ofs;
471                         if (nrfrags == frag->node->frags)
472                                 break; /* We've found them all */
473                 }
474         }
475         if (fn) {
476                 if (ref_flags(raw) == REF_PRISTINE) {
477                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
478                         if (!ret) {
479                                 /* Urgh. Return it sensibly. */
480                                 frag->node->raw = f->inocache->nodes;
481                         }
482                         if (ret != -EBADFD)
483                                 goto upnout;
484                 }
485                 /* We found a datanode. Do the GC */
486                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
487                         /* It crosses a page boundary. Therefore, it must be a hole. */
488                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
489                 } else {
490                         /* It could still be a hole. But we GC the page this way anyway */
491                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
492                 }
493                 goto upnout;
494         }
495
496         /* Wasn't a dnode. Try dirent */
497         for (fd = f->dents; fd; fd=fd->next) {
498                 if (fd->raw == raw)
499                         break;
500         }
501
502         if (fd && fd->ino) {
503                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
504         } else if (fd) {
505                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
506         } else {
507                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
508                        ref_offset(raw), f->inocache->ino);
509                 if (ref_obsolete(raw)) {
510                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
511                 } else {
512                         jffs2_dbg_dump_node(c, ref_offset(raw));
513                         BUG();
514                 }
515         }
516  upnout:
517         up(&f->sem);
518
519         return ret;
520 }
521
522 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
523                                           struct jffs2_inode_cache *ic,
524                                           struct jffs2_raw_node_ref *raw)
525 {
526         union jffs2_node_union *node;
527         struct jffs2_raw_node_ref *nraw;
528         size_t retlen;
529         int ret;
530         uint32_t phys_ofs, alloclen;
531         uint32_t crc, rawlen;
532         int retried = 0;
533
534         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
535
536         rawlen = ref_totlen(c, c->gcblock, raw);
537
538         /* Ask for a small amount of space (or the totlen if smaller) because we
539            don't want to force wastage of the end of a block if splitting would
540            work. */
541         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
542                                 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
543                                 /* this is not the exact summary size of it,
544                                         it is only an upper estimation */
545
546         if (ret)
547                 return ret;
548
549         if (alloclen < rawlen) {
550                 /* Doesn't fit untouched. We'll go the old route and split it */
551                 return -EBADFD;
552         }
553
554         node = kmalloc(rawlen, GFP_KERNEL);
555         if (!node)
556                return -ENOMEM;
557
558         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
559         if (!ret && retlen != rawlen)
560                 ret = -EIO;
561         if (ret)
562                 goto out_node;
563
564         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
565         if (je32_to_cpu(node->u.hdr_crc) != crc) {
566                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
567                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
568                 goto bail;
569         }
570
571         switch(je16_to_cpu(node->u.nodetype)) {
572         case JFFS2_NODETYPE_INODE:
573                 crc = crc32(0, node, sizeof(node->i)-8);
574                 if (je32_to_cpu(node->i.node_crc) != crc) {
575                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
576                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
577                         goto bail;
578                 }
579
580                 if (je32_to_cpu(node->i.dsize)) {
581                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
582                         if (je32_to_cpu(node->i.data_crc) != crc) {
583                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
584                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
585                                 goto bail;
586                         }
587                 }
588                 break;
589
590         case JFFS2_NODETYPE_DIRENT:
591                 crc = crc32(0, node, sizeof(node->d)-8);
592                 if (je32_to_cpu(node->d.node_crc) != crc) {
593                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
594                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
595                         goto bail;
596                 }
597
598                 if (node->d.nsize) {
599                         crc = crc32(0, node->d.name, node->d.nsize);
600                         if (je32_to_cpu(node->d.name_crc) != crc) {
601                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
602                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
603                                 goto bail;
604                         }
605                 }
606                 break;
607         default:
608                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
609                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
610                 goto bail;
611         }
612
613         nraw = jffs2_alloc_raw_node_ref();
614         if (!nraw) {
615                 ret = -ENOMEM;
616                 goto out_node;
617         }
618
619         /* OK, all the CRCs are good; this node can just be copied as-is. */
620  retry:
621         nraw->flash_offset = phys_ofs;
622         nraw->__totlen = rawlen;
623         nraw->next_phys = NULL;
624
625         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
626
627         if (ret || (retlen != rawlen)) {
628                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
629                        rawlen, phys_ofs, ret, retlen);
630                 if (retlen) {
631                         /* Doesn't belong to any inode */
632                         nraw->next_in_ino = NULL;
633
634                         nraw->flash_offset |= REF_OBSOLETE;
635                         jffs2_add_physical_node_ref(c, nraw);
636                         jffs2_mark_node_obsolete(c, nraw);
637                 } else {
638                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
639                         jffs2_free_raw_node_ref(nraw);
640                 }
641                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
642                         /* Try to reallocate space and retry */
643                         uint32_t dummy;
644                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
645
646                         retried = 1;
647
648                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
649
650                         jffs2_dbg_acct_sanity_check(c,jeb);
651                         jffs2_dbg_acct_paranoia_check(c, jeb);
652
653                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
654                                                 /* this is not the exact summary size of it,
655                                                         it is only an upper estimation */
656
657                         if (!ret) {
658                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
659
660                                 jffs2_dbg_acct_sanity_check(c,jeb);
661                                 jffs2_dbg_acct_paranoia_check(c, jeb);
662
663                                 goto retry;
664                         }
665                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
666                         jffs2_free_raw_node_ref(nraw);
667                 }
668
669                 jffs2_free_raw_node_ref(nraw);
670                 if (!ret)
671                         ret = -EIO;
672                 goto out_node;
673         }
674         nraw->flash_offset |= REF_PRISTINE;
675         jffs2_add_physical_node_ref(c, nraw);
676
677         /* Link into per-inode list. This is safe because of the ic
678            state being INO_STATE_GC. Note that if we're doing this
679            for an inode which is in-core, the 'nraw' pointer is then
680            going to be fetched from ic->nodes by our caller. */
681         spin_lock(&c->erase_completion_lock);
682         nraw->next_in_ino = ic->nodes;
683         ic->nodes = nraw;
684         spin_unlock(&c->erase_completion_lock);
685
686         jffs2_mark_node_obsolete(c, raw);
687         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
688
689  out_node:
690         kfree(node);
691         return ret;
692  bail:
693         ret = -EBADFD;
694         goto out_node;
695 }
696
697 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
698                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
699 {
700         struct jffs2_full_dnode *new_fn;
701         struct jffs2_raw_inode ri;
702         struct jffs2_node_frag *last_frag;
703         jint16_t dev;
704         char *mdata = NULL, mdatalen = 0;
705         uint32_t alloclen, phys_ofs, ilen;
706         int ret;
707
708         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
709             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
710                 /* For these, we don't actually need to read the old node */
711                 /* FIXME: for minor or major > 255. */
712                 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
713                         JFFS2_F_I_RDEV_MIN(f)));
714                 mdata = (char *)&dev;
715                 mdatalen = sizeof(dev);
716                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
717         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
718                 mdatalen = fn->size;
719                 mdata = kmalloc(fn->size, GFP_KERNEL);
720                 if (!mdata) {
721                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
722                         return -ENOMEM;
723                 }
724                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
725                 if (ret) {
726                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
727                         kfree(mdata);
728                         return ret;
729                 }
730                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
731
732         }
733
734         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
735                                 JFFS2_SUMMARY_INODE_SIZE);
736         if (ret) {
737                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
738                        sizeof(ri)+ mdatalen, ret);
739                 goto out;
740         }
741
742         last_frag = frag_last(&f->fragtree);
743         if (last_frag)
744                 /* Fetch the inode length from the fragtree rather then
745                  * from i_size since i_size may have not been updated yet */
746                 ilen = last_frag->ofs + last_frag->size;
747         else
748                 ilen = JFFS2_F_I_SIZE(f);
749
750         memset(&ri, 0, sizeof(ri));
751         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
752         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
753         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
754         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
755
756         ri.ino = cpu_to_je32(f->inocache->ino);
757         ri.version = cpu_to_je32(++f->highest_version);
758         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
759         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
760         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
761         ri.isize = cpu_to_je32(ilen);
762         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
763         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
764         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
765         ri.offset = cpu_to_je32(0);
766         ri.csize = cpu_to_je32(mdatalen);
767         ri.dsize = cpu_to_je32(mdatalen);
768         ri.compr = JFFS2_COMPR_NONE;
769         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
770         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
771
772         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
773
774         if (IS_ERR(new_fn)) {
775                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
776                 ret = PTR_ERR(new_fn);
777                 goto out;
778         }
779         jffs2_mark_node_obsolete(c, fn->raw);
780         jffs2_free_full_dnode(fn);
781         f->metadata = new_fn;
782  out:
783         if (S_ISLNK(JFFS2_F_I_MODE(f)))
784                 kfree(mdata);
785         return ret;
786 }
787
788 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
789                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
790 {
791         struct jffs2_full_dirent *new_fd;
792         struct jffs2_raw_dirent rd;
793         uint32_t alloclen, phys_ofs;
794         int ret;
795
796         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
797         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
798         rd.nsize = strlen(fd->name);
799         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
800         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
801
802         rd.pino = cpu_to_je32(f->inocache->ino);
803         rd.version = cpu_to_je32(++f->highest_version);
804         rd.ino = cpu_to_je32(fd->ino);
805         /* If the times on this inode were set by explicit utime() they can be different,
806            so refrain from splatting them. */
807         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
808                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
809         else
810                 rd.mctime = cpu_to_je32(0);
811         rd.type = fd->type;
812         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
813         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
814
815         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
816                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
817         if (ret) {
818                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
819                        sizeof(rd)+rd.nsize, ret);
820                 return ret;
821         }
822         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
823
824         if (IS_ERR(new_fd)) {
825                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
826                 return PTR_ERR(new_fd);
827         }
828         jffs2_add_fd_to_list(c, new_fd, &f->dents);
829         return 0;
830 }
831
832 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
833                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
834 {
835         struct jffs2_full_dirent **fdp = &f->dents;
836         int found = 0;
837
838         /* On a medium where we can't actually mark nodes obsolete
839            pernamently, such as NAND flash, we need to work out
840            whether this deletion dirent is still needed to actively
841            delete a 'real' dirent with the same name that's still
842            somewhere else on the flash. */
843         if (!jffs2_can_mark_obsolete(c)) {
844                 struct jffs2_raw_dirent *rd;
845                 struct jffs2_raw_node_ref *raw;
846                 int ret;
847                 size_t retlen;
848                 int name_len = strlen(fd->name);
849                 uint32_t name_crc = crc32(0, fd->name, name_len);
850                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
851
852                 rd = kmalloc(rawlen, GFP_KERNEL);
853                 if (!rd)
854                         return -ENOMEM;
855
856                 /* Prevent the erase code from nicking the obsolete node refs while
857                    we're looking at them. I really don't like this extra lock but
858                    can't see any alternative. Suggestions on a postcard to... */
859                 down(&c->erase_free_sem);
860
861                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
862
863                         /* We only care about obsolete ones */
864                         if (!(ref_obsolete(raw)))
865                                 continue;
866
867                         /* Any dirent with the same name is going to have the same length... */
868                         if (ref_totlen(c, NULL, raw) != rawlen)
869                                 continue;
870
871                         /* Doesn't matter if there's one in the same erase block. We're going to
872                            delete it too at the same time. */
873                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
874                                 continue;
875
876                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
877
878                         /* This is an obsolete node belonging to the same directory, and it's of the right
879                            length. We need to take a closer look...*/
880                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
881                         if (ret) {
882                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
883                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
884                                 continue;
885                         }
886                         if (retlen != rawlen) {
887                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
888                                        retlen, rawlen, ref_offset(raw));
889                                 continue;
890                         }
891
892                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
893                                 continue;
894
895                         /* If the name CRC doesn't match, skip */
896                         if (je32_to_cpu(rd->name_crc) != name_crc)
897                                 continue;
898
899                         /* If the name length doesn't match, or it's another deletion dirent, skip */
900                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
901                                 continue;
902
903                         /* OK, check the actual name now */
904                         if (memcmp(rd->name, fd->name, name_len))
905                                 continue;
906
907                         /* OK. The name really does match. There really is still an older node on
908                            the flash which our deletion dirent obsoletes. So we have to write out
909                            a new deletion dirent to replace it */
910                         up(&c->erase_free_sem);
911
912                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
913                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
914                         kfree(rd);
915
916                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
917                 }
918
919                 up(&c->erase_free_sem);
920                 kfree(rd);
921         }
922
923         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
924            we should update the metadata node with those times accordingly */
925
926         /* No need for it any more. Just mark it obsolete and remove it from the list */
927         while (*fdp) {
928                 if ((*fdp) == fd) {
929                         found = 1;
930                         *fdp = fd->next;
931                         break;
932                 }
933                 fdp = &(*fdp)->next;
934         }
935         if (!found) {
936                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
937         }
938         jffs2_mark_node_obsolete(c, fd->raw);
939         jffs2_free_full_dirent(fd);
940         return 0;
941 }
942
943 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
944                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
945                                       uint32_t start, uint32_t end)
946 {
947         struct jffs2_raw_inode ri;
948         struct jffs2_node_frag *frag;
949         struct jffs2_full_dnode *new_fn;
950         uint32_t alloclen, phys_ofs, ilen;
951         int ret;
952
953         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
954                   f->inocache->ino, start, end));
955
956         memset(&ri, 0, sizeof(ri));
957
958         if(fn->frags > 1) {
959                 size_t readlen;
960                 uint32_t crc;
961                 /* It's partially obsoleted by a later write. So we have to
962                    write it out again with the _same_ version as before */
963                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
964                 if (readlen != sizeof(ri) || ret) {
965                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
966                         goto fill;
967                 }
968                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
969                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
970                                ref_offset(fn->raw),
971                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
972                         return -EIO;
973                 }
974                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
975                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
976                                ref_offset(fn->raw),
977                                je32_to_cpu(ri.totlen), sizeof(ri));
978                         return -EIO;
979                 }
980                 crc = crc32(0, &ri, sizeof(ri)-8);
981                 if (crc != je32_to_cpu(ri.node_crc)) {
982                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
983                                ref_offset(fn->raw),
984                                je32_to_cpu(ri.node_crc), crc);
985                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
986                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
987                                start, end, f->inocache->ino);
988                         goto fill;
989                 }
990                 if (ri.compr != JFFS2_COMPR_ZERO) {
991                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
992                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
993                                start, end, f->inocache->ino);
994                         goto fill;
995                 }
996         } else {
997         fill:
998                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
999                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1000                 ri.totlen = cpu_to_je32(sizeof(ri));
1001                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1002
1003                 ri.ino = cpu_to_je32(f->inocache->ino);
1004                 ri.version = cpu_to_je32(++f->highest_version);
1005                 ri.offset = cpu_to_je32(start);
1006                 ri.dsize = cpu_to_je32(end - start);
1007                 ri.csize = cpu_to_je32(0);
1008                 ri.compr = JFFS2_COMPR_ZERO;
1009         }
1010
1011         frag = frag_last(&f->fragtree);
1012         if (frag)
1013                 /* Fetch the inode length from the fragtree rather then
1014                  * from i_size since i_size may have not been updated yet */
1015                 ilen = frag->ofs + frag->size;
1016         else
1017                 ilen = JFFS2_F_I_SIZE(f);
1018
1019         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1020         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1021         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1022         ri.isize = cpu_to_je32(ilen);
1023         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1024         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1025         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1026         ri.data_crc = cpu_to_je32(0);
1027         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1028
1029         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1030                                 JFFS2_SUMMARY_INODE_SIZE);
1031         if (ret) {
1032                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1033                        sizeof(ri), ret);
1034                 return ret;
1035         }
1036         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1037
1038         if (IS_ERR(new_fn)) {
1039                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1040                 return PTR_ERR(new_fn);
1041         }
1042         if (je32_to_cpu(ri.version) == f->highest_version) {
1043                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1044                 if (f->metadata) {
1045                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1046                         jffs2_free_full_dnode(f->metadata);
1047                         f->metadata = NULL;
1048                 }
1049                 return 0;
1050         }
1051
1052         /*
1053          * We should only get here in the case where the node we are
1054          * replacing had more than one frag, so we kept the same version
1055          * number as before. (Except in case of error -- see 'goto fill;'
1056          * above.)
1057          */
1058         D1(if(unlikely(fn->frags <= 1)) {
1059                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1060                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1061                        je32_to_cpu(ri.ino));
1062         });
1063
1064         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1065         mark_ref_normal(new_fn->raw);
1066
1067         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1068              frag; frag = frag_next(frag)) {
1069                 if (frag->ofs > fn->size + fn->ofs)
1070                         break;
1071                 if (frag->node == fn) {
1072                         frag->node = new_fn;
1073                         new_fn->frags++;
1074                         fn->frags--;
1075                 }
1076         }
1077         if (fn->frags) {
1078                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1079                 BUG();
1080         }
1081         if (!new_fn->frags) {
1082                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1083                 BUG();
1084         }
1085
1086         jffs2_mark_node_obsolete(c, fn->raw);
1087         jffs2_free_full_dnode(fn);
1088
1089         return 0;
1090 }
1091
1092 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1093                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1094                                        uint32_t start, uint32_t end)
1095 {
1096         struct jffs2_full_dnode *new_fn;
1097         struct jffs2_raw_inode ri;
1098         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1099         int ret = 0;
1100         unsigned char *comprbuf = NULL, *writebuf;
1101         unsigned long pg;
1102         unsigned char *pg_ptr;
1103
1104         memset(&ri, 0, sizeof(ri));
1105
1106         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1107                   f->inocache->ino, start, end));
1108
1109         orig_end = end;
1110         orig_start = start;
1111
1112         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1113                 /* Attempt to do some merging. But only expand to cover logically
1114                    adjacent frags if the block containing them is already considered
1115                    to be dirty. Otherwise we end up with GC just going round in
1116                    circles dirtying the nodes it already wrote out, especially
1117                    on NAND where we have small eraseblocks and hence a much higher
1118                    chance of nodes having to be split to cross boundaries. */
1119
1120                 struct jffs2_node_frag *frag;
1121                 uint32_t min, max;
1122
1123                 min = start & ~(PAGE_CACHE_SIZE-1);
1124                 max = min + PAGE_CACHE_SIZE;
1125
1126                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1127
1128                 /* BUG_ON(!frag) but that'll happen anyway... */
1129
1130                 BUG_ON(frag->ofs != start);
1131
1132                 /* First grow down... */
1133                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1134
1135                         /* If the previous frag doesn't even reach the beginning, there's
1136                            excessive fragmentation. Just merge. */
1137                         if (frag->ofs > min) {
1138                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1139                                           frag->ofs, frag->ofs+frag->size));
1140                                 start = frag->ofs;
1141                                 continue;
1142                         }
1143                         /* OK. This frag holds the first byte of the page. */
1144                         if (!frag->node || !frag->node->raw) {
1145                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1146                                           frag->ofs, frag->ofs+frag->size));
1147                                 break;
1148                         } else {
1149
1150                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1151                                    in a block which is still considered clean? If so, don't obsolete it.
1152                                    If not, cover it anyway. */
1153
1154                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1155                                 struct jffs2_eraseblock *jeb;
1156
1157                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1158
1159                                 if (jeb == c->gcblock) {
1160                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1161                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1162                                         start = frag->ofs;
1163                                         break;
1164                                 }
1165                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1166                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1167                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1168                                         break;
1169                                 }
1170
1171                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1172                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1173                                 start = frag->ofs;
1174                                 break;
1175                         }
1176                 }
1177
1178                 /* ... then up */
1179
1180                 /* Find last frag which is actually part of the node we're to GC. */
1181                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1182
1183                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1184
1185                         /* If the previous frag doesn't even reach the beginning, there's lots
1186                            of fragmentation. Just merge. */
1187                         if (frag->ofs+frag->size < max) {
1188                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1189                                           frag->ofs, frag->ofs+frag->size));
1190                                 end = frag->ofs + frag->size;
1191                                 continue;
1192                         }
1193
1194                         if (!frag->node || !frag->node->raw) {
1195                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1196                                           frag->ofs, frag->ofs+frag->size));
1197                                 break;
1198                         } else {
1199
1200                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1201                                    in a block which is still considered clean? If so, don't obsolete it.
1202                                    If not, cover it anyway. */
1203
1204                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1205                                 struct jffs2_eraseblock *jeb;
1206
1207                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1208
1209                                 if (jeb == c->gcblock) {
1210                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1211                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1212                                         end = frag->ofs + frag->size;
1213                                         break;
1214                                 }
1215                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1216                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1217                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1218                                         break;
1219                                 }
1220
1221                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1222                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1223                                 end = frag->ofs + frag->size;
1224                                 break;
1225                         }
1226                 }
1227                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1228                           orig_start, orig_end, start, end));
1229
1230                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1231                 BUG_ON(end < orig_end);
1232                 BUG_ON(start > orig_start);
1233         }
1234
1235         /* First, use readpage() to read the appropriate page into the page cache */
1236         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1237          *    triggered garbage collection in the first place?
1238          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1239          *    page OK. We'll actually write it out again in commit_write, which is a little
1240          *    suboptimal, but at least we're correct.
1241          */
1242         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1243
1244         if (IS_ERR(pg_ptr)) {
1245                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1246                 return PTR_ERR(pg_ptr);
1247         }
1248
1249         offset = start;
1250         while(offset < orig_end) {
1251                 uint32_t datalen;
1252                 uint32_t cdatalen;
1253                 uint16_t comprtype = JFFS2_COMPR_NONE;
1254
1255                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1256                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1257
1258                 if (ret) {
1259                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1260                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1261                         break;
1262                 }
1263                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1264                 datalen = end - offset;
1265
1266                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1267
1268                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1269
1270                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1271                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1272                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1273                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1274
1275                 ri.ino = cpu_to_je32(f->inocache->ino);
1276                 ri.version = cpu_to_je32(++f->highest_version);
1277                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1278                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1279                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1280                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1281                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1282                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1283                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1284                 ri.offset = cpu_to_je32(offset);
1285                 ri.csize = cpu_to_je32(cdatalen);
1286                 ri.dsize = cpu_to_je32(datalen);
1287                 ri.compr = comprtype & 0xff;
1288                 ri.usercompr = (comprtype >> 8) & 0xff;
1289                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1290                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1291
1292                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1293
1294                 jffs2_free_comprbuf(comprbuf, writebuf);
1295
1296                 if (IS_ERR(new_fn)) {
1297                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1298                         ret = PTR_ERR(new_fn);
1299                         break;
1300                 }
1301                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1302                 offset += datalen;
1303                 if (f->metadata) {
1304                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1305                         jffs2_free_full_dnode(f->metadata);
1306                         f->metadata = NULL;
1307                 }
1308         }
1309
1310         jffs2_gc_release_page(c, pg_ptr, &pg);
1311         return ret;
1312 }