[JFFS2] Improve garbage collector block selection
[linux-2.6] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: gc.c,v 1.146 2005/03/20 17:45:25 dedekind Exp $
11  *
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/crc32.h>
19 #include <linux/compiler.h>
20 #include <linux/stat.h>
21 #include "nodelist.h"
22 #include "compr.h"
23
24 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
25                                           struct jffs2_inode_cache *ic,
26                                           struct jffs2_raw_node_ref *raw);
27 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
28                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
29 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
32                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35                                       uint32_t start, uint32_t end);
36 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38                                        uint32_t start, uint32_t end);
39 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
40                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42 /* Called with erase_completion_lock held */
43 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44 {
45         struct jffs2_eraseblock *ret;
46         struct list_head *nextlist = NULL;
47         int n = jiffies % 128;
48
49         /* Pick an eraseblock to garbage collect next. This is where we'll
50            put the clever wear-levelling algorithms. Eventually.  */
51         /* We possibly want to favour the dirtier blocks more when the
52            number of free blocks is low. */
53 again:
54         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56                 nextlist = &c->bad_used_list;
57         } else if (n < 50 && !list_empty(&c->erasable_list)) {
58                 /* Note that most of them will have gone directly to be erased. 
59                    So don't favour the erasable_list _too_ much. */
60                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61                 nextlist = &c->erasable_list;
62         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63                 /* Most of the time, pick one off the very_dirty list */
64                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65                 nextlist = &c->very_dirty_list;
66         } else if (n < 126 && !list_empty(&c->dirty_list)) {
67                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68                 nextlist = &c->dirty_list;
69         } else if (!list_empty(&c->clean_list)) {
70                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71                 nextlist = &c->clean_list;
72         } else if (!list_empty(&c->dirty_list)) {
73                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75                 nextlist = &c->dirty_list;
76         } else if (!list_empty(&c->very_dirty_list)) {
77                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78                 nextlist = &c->very_dirty_list;
79         } else if (!list_empty(&c->erasable_list)) {
80                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82                 nextlist = &c->erasable_list;
83         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84                 /* There are blocks are wating for the wbuf sync */
85                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
86                 jffs2_flush_wbuf_pad(c);
87                 goto again;
88         } else {
89                 /* Eep. All were empty */
90                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91                 return NULL;
92         }
93
94         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95         list_del(&ret->list);
96         c->gcblock = ret;
97         ret->gc_node = ret->first_node;
98         if (!ret->gc_node) {
99                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100                 BUG();
101         }
102         
103         /* Have we accidentally picked a clean block with wasted space ? */
104         if (ret->wasted_size) {
105                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106                 ret->dirty_size += ret->wasted_size;
107                 c->wasted_size -= ret->wasted_size;
108                 c->dirty_size += ret->wasted_size;
109                 ret->wasted_size = 0;
110         }
111
112         D2(jffs2_dump_block_lists(c));
113         return ret;
114 }
115
116 /* jffs2_garbage_collect_pass
117  * Make a single attempt to progress GC. Move one node, and possibly
118  * start erasing one eraseblock.
119  */
120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
121 {
122         struct jffs2_inode_info *f;
123         struct jffs2_inode_cache *ic;
124         struct jffs2_eraseblock *jeb;
125         struct jffs2_raw_node_ref *raw;
126         int ret = 0, inum, nlink;
127
128         if (down_interruptible(&c->alloc_sem))
129                 return -EINTR;
130
131         for (;;) {
132                 spin_lock(&c->erase_completion_lock);
133                 if (!c->unchecked_size)
134                         break;
135
136                 /* We can't start doing GC yet. We haven't finished checking
137                    the node CRCs etc. Do it now. */
138                 
139                 /* checked_ino is protected by the alloc_sem */
140                 if (c->checked_ino > c->highest_ino) {
141                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
142                                c->unchecked_size);
143                         D2(jffs2_dump_block_lists(c));
144                         spin_unlock(&c->erase_completion_lock);
145                         BUG();
146                 }
147
148                 spin_unlock(&c->erase_completion_lock);
149
150                 spin_lock(&c->inocache_lock);
151
152                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
153
154                 if (!ic) {
155                         spin_unlock(&c->inocache_lock);
156                         continue;
157                 }
158
159                 if (!ic->nlink) {
160                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
161                                   ic->ino));
162                         spin_unlock(&c->inocache_lock);
163                         continue;
164                 }
165                 switch(ic->state) {
166                 case INO_STATE_CHECKEDABSENT:
167                 case INO_STATE_PRESENT:
168                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
169                         spin_unlock(&c->inocache_lock);
170                         continue;
171
172                 case INO_STATE_GC:
173                 case INO_STATE_CHECKING:
174                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
175                         spin_unlock(&c->inocache_lock);
176                         BUG();
177
178                 case INO_STATE_READING:
179                         /* We need to wait for it to finish, lest we move on
180                            and trigger the BUG() above while we haven't yet 
181                            finished checking all its nodes */
182                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
183                         up(&c->alloc_sem);
184                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
185                         return 0;
186
187                 default:
188                         BUG();
189
190                 case INO_STATE_UNCHECKED:
191                         ;
192                 }
193                 ic->state = INO_STATE_CHECKING;
194                 spin_unlock(&c->inocache_lock);
195
196                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
197
198                 ret = jffs2_do_crccheck_inode(c, ic);
199                 if (ret)
200                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
201
202                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
203                 up(&c->alloc_sem);
204                 return ret;
205         }
206
207         /* First, work out which block we're garbage-collecting */
208         jeb = c->gcblock;
209
210         if (!jeb)
211                 jeb = jffs2_find_gc_block(c);
212
213         if (!jeb) {
214                 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
215                 spin_unlock(&c->erase_completion_lock);
216                 up(&c->alloc_sem);
217                 return -EIO;
218         }
219
220         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
221         D1(if (c->nextblock)
222            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
223
224         if (!jeb->used_size) {
225                 up(&c->alloc_sem);
226                 goto eraseit;
227         }
228
229         raw = jeb->gc_node;
230                         
231         while(ref_obsolete(raw)) {
232                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
233                 raw = raw->next_phys;
234                 if (unlikely(!raw)) {
235                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
236                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n", 
237                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
238                         jeb->gc_node = raw;
239                         spin_unlock(&c->erase_completion_lock);
240                         up(&c->alloc_sem);
241                         BUG();
242                 }
243         }
244         jeb->gc_node = raw;
245
246         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
247
248         if (!raw->next_in_ino) {
249                 /* Inode-less node. Clean marker, snapshot or something like that */
250                 /* FIXME: If it's something that needs to be copied, including something
251                    we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
252                 spin_unlock(&c->erase_completion_lock);
253                 jffs2_mark_node_obsolete(c, raw);
254                 up(&c->alloc_sem);
255                 goto eraseit_lock;
256         }
257
258         ic = jffs2_raw_ref_to_ic(raw);
259
260         /* We need to hold the inocache. Either the erase_completion_lock or
261            the inocache_lock are sufficient; we trade down since the inocache_lock 
262            causes less contention. */
263         spin_lock(&c->inocache_lock);
264
265         spin_unlock(&c->erase_completion_lock);
266
267         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
268
269         /* Three possibilities:
270            1. Inode is already in-core. We must iget it and do proper
271               updating to its fragtree, etc.
272            2. Inode is not in-core, node is REF_PRISTINE. We lock the
273               inocache to prevent a read_inode(), copy the node intact.
274            3. Inode is not in-core, node is not pristine. We must iget()
275               and take the slow path.
276         */
277
278         switch(ic->state) {
279         case INO_STATE_CHECKEDABSENT:
280                 /* It's been checked, but it's not currently in-core. 
281                    We can just copy any pristine nodes, but have
282                    to prevent anyone else from doing read_inode() while
283                    we're at it, so we set the state accordingly */
284                 if (ref_flags(raw) == REF_PRISTINE)
285                         ic->state = INO_STATE_GC;
286                 else {
287                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n", 
288                                   ic->ino));
289                 }
290                 break;
291
292         case INO_STATE_PRESENT:
293                 /* It's in-core. GC must iget() it. */
294                 break;
295
296         case INO_STATE_UNCHECKED:
297         case INO_STATE_CHECKING:
298         case INO_STATE_GC:
299                 /* Should never happen. We should have finished checking
300                    by the time we actually start doing any GC, and since 
301                    we're holding the alloc_sem, no other garbage collection 
302                    can happen.
303                 */
304                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
305                        ic->ino, ic->state);
306                 up(&c->alloc_sem);
307                 spin_unlock(&c->inocache_lock);
308                 BUG();
309
310         case INO_STATE_READING:
311                 /* Someone's currently trying to read it. We must wait for
312                    them to finish and then go through the full iget() route
313                    to do the GC. However, sometimes read_inode() needs to get
314                    the alloc_sem() (for marking nodes invalid) so we must
315                    drop the alloc_sem before sleeping. */
316
317                 up(&c->alloc_sem);
318                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
319                           ic->ino, ic->state));
320                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
321                 /* And because we dropped the alloc_sem we must start again from the 
322                    beginning. Ponder chance of livelock here -- we're returning success
323                    without actually making any progress.
324
325                    Q: What are the chances that the inode is back in INO_STATE_READING 
326                    again by the time we next enter this function? And that this happens
327                    enough times to cause a real delay?
328
329                    A: Small enough that I don't care :) 
330                 */
331                 return 0;
332         }
333
334         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
335            node intact, and we don't have to muck about with the fragtree etc. 
336            because we know it's not in-core. If it _was_ in-core, we go through
337            all the iget() crap anyway */
338
339         if (ic->state == INO_STATE_GC) {
340                 spin_unlock(&c->inocache_lock);
341
342                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
343
344                 spin_lock(&c->inocache_lock);
345                 ic->state = INO_STATE_CHECKEDABSENT;
346                 wake_up(&c->inocache_wq);
347
348                 if (ret != -EBADFD) {
349                         spin_unlock(&c->inocache_lock);
350                         goto release_sem;
351                 }
352
353                 /* Fall through if it wanted us to, with inocache_lock held */
354         }
355
356         /* Prevent the fairly unlikely race where the gcblock is
357            entirely obsoleted by the final close of a file which had
358            the only valid nodes in the block, followed by erasure,
359            followed by freeing of the ic because the erased block(s)
360            held _all_ the nodes of that inode.... never been seen but
361            it's vaguely possible. */
362
363         inum = ic->ino;
364         nlink = ic->nlink;
365         spin_unlock(&c->inocache_lock);
366
367         f = jffs2_gc_fetch_inode(c, inum, nlink);
368         if (IS_ERR(f)) {
369                 ret = PTR_ERR(f);
370                 goto release_sem;
371         }
372         if (!f) {
373                 ret = 0;
374                 goto release_sem;
375         }
376
377         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
378
379         jffs2_gc_release_inode(c, f);
380
381  release_sem:
382         up(&c->alloc_sem);
383
384  eraseit_lock:
385         /* If we've finished this block, start it erasing */
386         spin_lock(&c->erase_completion_lock);
387
388  eraseit:
389         if (c->gcblock && !c->gcblock->used_size) {
390                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
391                 /* We're GC'ing an empty block? */
392                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
393                 c->gcblock = NULL;
394                 c->nr_erasing_blocks++;
395                 jffs2_erase_pending_trigger(c);
396         }
397         spin_unlock(&c->erase_completion_lock);
398
399         return ret;
400 }
401
402 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
403                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
404 {
405         struct jffs2_node_frag *frag;
406         struct jffs2_full_dnode *fn = NULL;
407         struct jffs2_full_dirent *fd;
408         uint32_t start = 0, end = 0, nrfrags = 0;
409         int ret = 0;
410
411         down(&f->sem);
412
413         /* Now we have the lock for this inode. Check that it's still the one at the head
414            of the list. */
415
416         spin_lock(&c->erase_completion_lock);
417
418         if (c->gcblock != jeb) {
419                 spin_unlock(&c->erase_completion_lock);
420                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
421                 goto upnout;
422         }
423         if (ref_obsolete(raw)) {
424                 spin_unlock(&c->erase_completion_lock);
425                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
426                 /* They'll call again */
427                 goto upnout;
428         }
429         spin_unlock(&c->erase_completion_lock);
430
431         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
432         if (f->metadata && f->metadata->raw == raw) {
433                 fn = f->metadata;
434                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
435                 goto upnout;
436         }
437
438         /* FIXME. Read node and do lookup? */
439         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
440                 if (frag->node && frag->node->raw == raw) {
441                         fn = frag->node;
442                         end = frag->ofs + frag->size;
443                         if (!nrfrags++)
444                                 start = frag->ofs;
445                         if (nrfrags == frag->node->frags)
446                                 break; /* We've found them all */
447                 }
448         }
449         if (fn) {
450                 if (ref_flags(raw) == REF_PRISTINE) {
451                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
452                         if (!ret) {
453                                 /* Urgh. Return it sensibly. */
454                                 frag->node->raw = f->inocache->nodes;
455                         }       
456                         if (ret != -EBADFD)
457                                 goto upnout;
458                 }
459                 /* We found a datanode. Do the GC */
460                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
461                         /* It crosses a page boundary. Therefore, it must be a hole. */
462                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
463                 } else {
464                         /* It could still be a hole. But we GC the page this way anyway */
465                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
466                 }
467                 goto upnout;
468         }
469         
470         /* Wasn't a dnode. Try dirent */
471         for (fd = f->dents; fd; fd=fd->next) {
472                 if (fd->raw == raw)
473                         break;
474         }
475
476         if (fd && fd->ino) {
477                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
478         } else if (fd) {
479                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
480         } else {
481                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
482                        ref_offset(raw), f->inocache->ino);
483                 if (ref_obsolete(raw)) {
484                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
485                 } else {
486                         ret = -EIO;
487                 }
488         }
489  upnout:
490         up(&f->sem);
491
492         return ret;
493 }
494
495 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c, 
496                                           struct jffs2_inode_cache *ic,
497                                           struct jffs2_raw_node_ref *raw)
498 {
499         union jffs2_node_union *node;
500         struct jffs2_raw_node_ref *nraw;
501         size_t retlen;
502         int ret;
503         uint32_t phys_ofs, alloclen;
504         uint32_t crc, rawlen;
505         int retried = 0;
506
507         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
508
509         rawlen = ref_totlen(c, c->gcblock, raw);
510
511         /* Ask for a small amount of space (or the totlen if smaller) because we
512            don't want to force wastage of the end of a block if splitting would
513            work. */
514         ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN, 
515                                               rawlen), &phys_ofs, &alloclen);
516         if (ret)
517                 return ret;
518
519         if (alloclen < rawlen) {
520                 /* Doesn't fit untouched. We'll go the old route and split it */
521                 return -EBADFD;
522         }
523
524         node = kmalloc(rawlen, GFP_KERNEL);
525         if (!node)
526                return -ENOMEM;
527
528         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
529         if (!ret && retlen != rawlen)
530                 ret = -EIO;
531         if (ret)
532                 goto out_node;
533
534         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
535         if (je32_to_cpu(node->u.hdr_crc) != crc) {
536                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
537                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
538                 goto bail;
539         }
540
541         switch(je16_to_cpu(node->u.nodetype)) {
542         case JFFS2_NODETYPE_INODE:
543                 crc = crc32(0, node, sizeof(node->i)-8);
544                 if (je32_to_cpu(node->i.node_crc) != crc) {
545                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
546                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
547                         goto bail;
548                 }
549
550                 if (je32_to_cpu(node->i.dsize)) {
551                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
552                         if (je32_to_cpu(node->i.data_crc) != crc) {
553                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
554                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
555                                 goto bail;
556                         }
557                 }
558                 break;
559
560         case JFFS2_NODETYPE_DIRENT:
561                 crc = crc32(0, node, sizeof(node->d)-8);
562                 if (je32_to_cpu(node->d.node_crc) != crc) {
563                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
564                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
565                         goto bail;
566                 }
567
568                 if (node->d.nsize) {
569                         crc = crc32(0, node->d.name, node->d.nsize);
570                         if (je32_to_cpu(node->d.name_crc) != crc) {
571                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
572                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
573                                 goto bail;
574                         }
575                 }
576                 break;
577         default:
578                 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n", 
579                        ref_offset(raw), je16_to_cpu(node->u.nodetype));
580                 goto bail;
581         }
582
583         nraw = jffs2_alloc_raw_node_ref();
584         if (!nraw) {
585                 ret = -ENOMEM;
586                 goto out_node;
587         }
588
589         /* OK, all the CRCs are good; this node can just be copied as-is. */
590  retry:
591         nraw->flash_offset = phys_ofs;
592         nraw->__totlen = rawlen;
593         nraw->next_phys = NULL;
594
595         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
596
597         if (ret || (retlen != rawlen)) {
598                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
599                        rawlen, phys_ofs, ret, retlen);
600                 if (retlen) {
601                         /* Doesn't belong to any inode */
602                         nraw->next_in_ino = NULL;
603
604                         nraw->flash_offset |= REF_OBSOLETE;
605                         jffs2_add_physical_node_ref(c, nraw);
606                         jffs2_mark_node_obsolete(c, nraw);
607                 } else {
608                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
609                         jffs2_free_raw_node_ref(nraw);
610                 }
611                 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
612                         /* Try to reallocate space and retry */
613                         uint32_t dummy;
614                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
615
616                         retried = 1;
617
618                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
619                         
620                         ACCT_SANITY_CHECK(c,jeb);
621                         D1(ACCT_PARANOIA_CHECK(jeb));
622
623                         ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy);
624
625                         if (!ret) {
626                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
627
628                                 ACCT_SANITY_CHECK(c,jeb);
629                                 D1(ACCT_PARANOIA_CHECK(jeb));
630
631                                 goto retry;
632                         }
633                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
634                         jffs2_free_raw_node_ref(nraw);
635                 }
636
637                 jffs2_free_raw_node_ref(nraw);
638                 if (!ret)
639                         ret = -EIO;
640                 goto out_node;
641         }
642         nraw->flash_offset |= REF_PRISTINE;
643         jffs2_add_physical_node_ref(c, nraw);
644
645         /* Link into per-inode list. This is safe because of the ic
646            state being INO_STATE_GC. Note that if we're doing this
647            for an inode which is in-core, the 'nraw' pointer is then
648            going to be fetched from ic->nodes by our caller. */
649         spin_lock(&c->erase_completion_lock);
650         nraw->next_in_ino = ic->nodes;
651         ic->nodes = nraw;
652         spin_unlock(&c->erase_completion_lock);
653
654         jffs2_mark_node_obsolete(c, raw);
655         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
656
657  out_node:
658         kfree(node);
659         return ret;
660  bail:
661         ret = -EBADFD;
662         goto out_node;
663 }
664
665 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
666                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
667 {
668         struct jffs2_full_dnode *new_fn;
669         struct jffs2_raw_inode ri;
670         jint16_t dev;
671         char *mdata = NULL, mdatalen = 0;
672         uint32_t alloclen, phys_ofs;
673         int ret;
674
675         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
676             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
677                 /* For these, we don't actually need to read the old node */
678                 /* FIXME: for minor or major > 255. */
679                 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) | 
680                         JFFS2_F_I_RDEV_MIN(f)));
681                 mdata = (char *)&dev;
682                 mdatalen = sizeof(dev);
683                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
684         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
685                 mdatalen = fn->size;
686                 mdata = kmalloc(fn->size, GFP_KERNEL);
687                 if (!mdata) {
688                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
689                         return -ENOMEM;
690                 }
691                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
692                 if (ret) {
693                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
694                         kfree(mdata);
695                         return ret;
696                 }
697                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
698
699         }
700         
701         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen);
702         if (ret) {
703                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
704                        sizeof(ri)+ mdatalen, ret);
705                 goto out;
706         }
707         
708         memset(&ri, 0, sizeof(ri));
709         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
710         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
711         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
712         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
713
714         ri.ino = cpu_to_je32(f->inocache->ino);
715         ri.version = cpu_to_je32(++f->highest_version);
716         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
717         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
718         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
719         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
720         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
721         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
722         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
723         ri.offset = cpu_to_je32(0);
724         ri.csize = cpu_to_je32(mdatalen);
725         ri.dsize = cpu_to_je32(mdatalen);
726         ri.compr = JFFS2_COMPR_NONE;
727         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
728         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
729
730         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
731
732         if (IS_ERR(new_fn)) {
733                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
734                 ret = PTR_ERR(new_fn);
735                 goto out;
736         }
737         jffs2_mark_node_obsolete(c, fn->raw);
738         jffs2_free_full_dnode(fn);
739         f->metadata = new_fn;
740  out:
741         if (S_ISLNK(JFFS2_F_I_MODE(f)))
742                 kfree(mdata);
743         return ret;
744 }
745
746 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
747                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
748 {
749         struct jffs2_full_dirent *new_fd;
750         struct jffs2_raw_dirent rd;
751         uint32_t alloclen, phys_ofs;
752         int ret;
753
754         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
755         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
756         rd.nsize = strlen(fd->name);
757         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
758         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
759
760         rd.pino = cpu_to_je32(f->inocache->ino);
761         rd.version = cpu_to_je32(++f->highest_version);
762         rd.ino = cpu_to_je32(fd->ino);
763         rd.mctime = cpu_to_je32(max(JFFS2_F_I_MTIME(f), JFFS2_F_I_CTIME(f)));
764         rd.type = fd->type;
765         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
766         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
767         
768         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen);
769         if (ret) {
770                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
771                        sizeof(rd)+rd.nsize, ret);
772                 return ret;
773         }
774         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
775
776         if (IS_ERR(new_fd)) {
777                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
778                 return PTR_ERR(new_fd);
779         }
780         jffs2_add_fd_to_list(c, new_fd, &f->dents);
781         return 0;
782 }
783
784 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 
785                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
786 {
787         struct jffs2_full_dirent **fdp = &f->dents;
788         int found = 0;
789
790         /* On a medium where we can't actually mark nodes obsolete
791            pernamently, such as NAND flash, we need to work out
792            whether this deletion dirent is still needed to actively
793            delete a 'real' dirent with the same name that's still
794            somewhere else on the flash. */
795         if (!jffs2_can_mark_obsolete(c)) {
796                 struct jffs2_raw_dirent *rd;
797                 struct jffs2_raw_node_ref *raw;
798                 int ret;
799                 size_t retlen;
800                 int name_len = strlen(fd->name);
801                 uint32_t name_crc = crc32(0, fd->name, name_len);
802                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
803
804                 rd = kmalloc(rawlen, GFP_KERNEL);
805                 if (!rd)
806                         return -ENOMEM;
807
808                 /* Prevent the erase code from nicking the obsolete node refs while
809                    we're looking at them. I really don't like this extra lock but
810                    can't see any alternative. Suggestions on a postcard to... */
811                 down(&c->erase_free_sem);
812
813                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
814
815                         /* We only care about obsolete ones */
816                         if (!(ref_obsolete(raw)))
817                                 continue;
818
819                         /* Any dirent with the same name is going to have the same length... */
820                         if (ref_totlen(c, NULL, raw) != rawlen)
821                                 continue;
822
823                         /* Doesn't matter if there's one in the same erase block. We're going to 
824                            delete it too at the same time. */
825                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
826                                 continue;
827
828                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
829
830                         /* This is an obsolete node belonging to the same directory, and it's of the right
831                            length. We need to take a closer look...*/
832                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
833                         if (ret) {
834                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
835                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
836                                 continue;
837                         }
838                         if (retlen != rawlen) {
839                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
840                                        retlen, rawlen, ref_offset(raw));
841                                 continue;
842                         }
843
844                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
845                                 continue;
846
847                         /* If the name CRC doesn't match, skip */
848                         if (je32_to_cpu(rd->name_crc) != name_crc)
849                                 continue;
850
851                         /* If the name length doesn't match, or it's another deletion dirent, skip */
852                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
853                                 continue;
854
855                         /* OK, check the actual name now */
856                         if (memcmp(rd->name, fd->name, name_len))
857                                 continue;
858
859                         /* OK. The name really does match. There really is still an older node on
860                            the flash which our deletion dirent obsoletes. So we have to write out
861                            a new deletion dirent to replace it */
862                         up(&c->erase_free_sem);
863
864                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
865                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
866                         kfree(rd);
867
868                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
869                 }
870
871                 up(&c->erase_free_sem);
872                 kfree(rd);
873         }
874
875         /* No need for it any more. Just mark it obsolete and remove it from the list */
876         while (*fdp) {
877                 if ((*fdp) == fd) {
878                         found = 1;
879                         *fdp = fd->next;
880                         break;
881                 }
882                 fdp = &(*fdp)->next;
883         }
884         if (!found) {
885                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
886         }
887         jffs2_mark_node_obsolete(c, fd->raw);
888         jffs2_free_full_dirent(fd);
889         return 0;
890 }
891
892 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
893                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
894                                       uint32_t start, uint32_t end)
895 {
896         struct jffs2_raw_inode ri;
897         struct jffs2_node_frag *frag;
898         struct jffs2_full_dnode *new_fn;
899         uint32_t alloclen, phys_ofs;
900         int ret;
901
902         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
903                   f->inocache->ino, start, end));
904         
905         memset(&ri, 0, sizeof(ri));
906
907         if(fn->frags > 1) {
908                 size_t readlen;
909                 uint32_t crc;
910                 /* It's partially obsoleted by a later write. So we have to 
911                    write it out again with the _same_ version as before */
912                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
913                 if (readlen != sizeof(ri) || ret) {
914                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
915                         goto fill;
916                 }
917                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
918                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
919                                ref_offset(fn->raw),
920                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
921                         return -EIO;
922                 }
923                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
924                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
925                                ref_offset(fn->raw),
926                                je32_to_cpu(ri.totlen), sizeof(ri));
927                         return -EIO;
928                 }
929                 crc = crc32(0, &ri, sizeof(ri)-8);
930                 if (crc != je32_to_cpu(ri.node_crc)) {
931                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
932                                ref_offset(fn->raw), 
933                                je32_to_cpu(ri.node_crc), crc);
934                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
935                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
936                                start, end, f->inocache->ino);
937                         goto fill;
938                 }
939                 if (ri.compr != JFFS2_COMPR_ZERO) {
940                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
941                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n", 
942                                start, end, f->inocache->ino);
943                         goto fill;
944                 }
945         } else {
946         fill:
947                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
948                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
949                 ri.totlen = cpu_to_je32(sizeof(ri));
950                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
951
952                 ri.ino = cpu_to_je32(f->inocache->ino);
953                 ri.version = cpu_to_je32(++f->highest_version);
954                 ri.offset = cpu_to_je32(start);
955                 ri.dsize = cpu_to_je32(end - start);
956                 ri.csize = cpu_to_je32(0);
957                 ri.compr = JFFS2_COMPR_ZERO;
958         }
959         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
960         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
961         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
962         ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
963         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
964         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
965         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
966         ri.data_crc = cpu_to_je32(0);
967         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
968
969         ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen);
970         if (ret) {
971                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
972                        sizeof(ri), ret);
973                 return ret;
974         }
975         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
976
977         if (IS_ERR(new_fn)) {
978                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
979                 return PTR_ERR(new_fn);
980         }
981         if (je32_to_cpu(ri.version) == f->highest_version) {
982                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
983                 if (f->metadata) {
984                         jffs2_mark_node_obsolete(c, f->metadata->raw);
985                         jffs2_free_full_dnode(f->metadata);
986                         f->metadata = NULL;
987                 }
988                 return 0;
989         }
990
991         /* 
992          * We should only get here in the case where the node we are
993          * replacing had more than one frag, so we kept the same version
994          * number as before. (Except in case of error -- see 'goto fill;' 
995          * above.)
996          */
997         D1(if(unlikely(fn->frags <= 1)) {
998                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
999                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1000                        je32_to_cpu(ri.ino));
1001         });
1002
1003         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1004         mark_ref_normal(new_fn->raw);
1005
1006         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs); 
1007              frag; frag = frag_next(frag)) {
1008                 if (frag->ofs > fn->size + fn->ofs)
1009                         break;
1010                 if (frag->node == fn) {
1011                         frag->node = new_fn;
1012                         new_fn->frags++;
1013                         fn->frags--;
1014                 }
1015         }
1016         if (fn->frags) {
1017                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1018                 BUG();
1019         }
1020         if (!new_fn->frags) {
1021                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1022                 BUG();
1023         }
1024                 
1025         jffs2_mark_node_obsolete(c, fn->raw);
1026         jffs2_free_full_dnode(fn);
1027         
1028         return 0;
1029 }
1030
1031 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1032                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1033                                        uint32_t start, uint32_t end)
1034 {
1035         struct jffs2_full_dnode *new_fn;
1036         struct jffs2_raw_inode ri;
1037         uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;      
1038         int ret = 0;
1039         unsigned char *comprbuf = NULL, *writebuf;
1040         unsigned long pg;
1041         unsigned char *pg_ptr;
1042  
1043         memset(&ri, 0, sizeof(ri));
1044
1045         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1046                   f->inocache->ino, start, end));
1047
1048         orig_end = end;
1049         orig_start = start;
1050
1051         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1052                 /* Attempt to do some merging. But only expand to cover logically
1053                    adjacent frags if the block containing them is already considered
1054                    to be dirty. Otherwise we end up with GC just going round in 
1055                    circles dirtying the nodes it already wrote out, especially 
1056                    on NAND where we have small eraseblocks and hence a much higher
1057                    chance of nodes having to be split to cross boundaries. */
1058
1059                 struct jffs2_node_frag *frag;
1060                 uint32_t min, max;
1061
1062                 min = start & ~(PAGE_CACHE_SIZE-1);
1063                 max = min + PAGE_CACHE_SIZE;
1064
1065                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1066
1067                 /* BUG_ON(!frag) but that'll happen anyway... */
1068
1069                 BUG_ON(frag->ofs != start);
1070
1071                 /* First grow down... */
1072                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1073
1074                         /* If the previous frag doesn't even reach the beginning, there's
1075                            excessive fragmentation. Just merge. */
1076                         if (frag->ofs > min) {
1077                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1078                                           frag->ofs, frag->ofs+frag->size));
1079                                 start = frag->ofs;
1080                                 continue;
1081                         }
1082                         /* OK. This frag holds the first byte of the page. */
1083                         if (!frag->node || !frag->node->raw) {
1084                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1085                                           frag->ofs, frag->ofs+frag->size));
1086                                 break;
1087                         } else {
1088
1089                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1090                                    in a block which is still considered clean? If so, don't obsolete it.
1091                                    If not, cover it anyway. */
1092
1093                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1094                                 struct jffs2_eraseblock *jeb;
1095
1096                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1097
1098                                 if (jeb == c->gcblock) {
1099                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1100                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1101                                         start = frag->ofs;
1102                                         break;
1103                                 }
1104                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1105                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1106                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1107                                         break;
1108                                 }
1109
1110                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1111                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1112                                 start = frag->ofs;
1113                                 break;
1114                         }
1115                 }
1116
1117                 /* ... then up */
1118
1119                 /* Find last frag which is actually part of the node we're to GC. */
1120                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1121
1122                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1123
1124                         /* If the previous frag doesn't even reach the beginning, there's lots
1125                            of fragmentation. Just merge. */
1126                         if (frag->ofs+frag->size < max) {
1127                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1128                                           frag->ofs, frag->ofs+frag->size));
1129                                 end = frag->ofs + frag->size;
1130                                 continue;
1131                         }
1132
1133                         if (!frag->node || !frag->node->raw) {
1134                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1135                                           frag->ofs, frag->ofs+frag->size));
1136                                 break;
1137                         } else {
1138
1139                                 /* OK, it's a frag which extends to the beginning of the page. Does it live 
1140                                    in a block which is still considered clean? If so, don't obsolete it.
1141                                    If not, cover it anyway. */
1142
1143                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1144                                 struct jffs2_eraseblock *jeb;
1145
1146                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1147
1148                                 if (jeb == c->gcblock) {
1149                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1150                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1151                                         end = frag->ofs + frag->size;
1152                                         break;
1153                                 }
1154                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1155                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1156                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1157                                         break;
1158                                 }
1159
1160                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1161                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1162                                 end = frag->ofs + frag->size;
1163                                 break;
1164                         }
1165                 }
1166                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n", 
1167                           orig_start, orig_end, start, end));
1168
1169                 BUG_ON(end > JFFS2_F_I_SIZE(f));
1170                 BUG_ON(end < orig_end);
1171                 BUG_ON(start > orig_start);
1172         }
1173         
1174         /* First, use readpage() to read the appropriate page into the page cache */
1175         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1176          *    triggered garbage collection in the first place?
1177          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1178          *    page OK. We'll actually write it out again in commit_write, which is a little
1179          *    suboptimal, but at least we're correct.
1180          */
1181         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1182
1183         if (IS_ERR(pg_ptr)) {
1184                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1185                 return PTR_ERR(pg_ptr);
1186         }
1187
1188         offset = start;
1189         while(offset < orig_end) {
1190                 uint32_t datalen;
1191                 uint32_t cdatalen;
1192                 uint16_t comprtype = JFFS2_COMPR_NONE;
1193
1194                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs, &alloclen);
1195
1196                 if (ret) {
1197                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1198                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1199                         break;
1200                 }
1201                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1202                 datalen = end - offset;
1203
1204                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1205
1206                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1207
1208                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1209                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1210                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1211                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1212
1213                 ri.ino = cpu_to_je32(f->inocache->ino);
1214                 ri.version = cpu_to_je32(++f->highest_version);
1215                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1216                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1217                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1218                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1219                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1220                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1221                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1222                 ri.offset = cpu_to_je32(offset);
1223                 ri.csize = cpu_to_je32(cdatalen);
1224                 ri.dsize = cpu_to_je32(datalen);
1225                 ri.compr = comprtype & 0xff;
1226                 ri.usercompr = (comprtype >> 8) & 0xff;
1227                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1228                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1229         
1230                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1231
1232                 jffs2_free_comprbuf(comprbuf, writebuf);
1233
1234                 if (IS_ERR(new_fn)) {
1235                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1236                         ret = PTR_ERR(new_fn);
1237                         break;
1238                 }
1239                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1240                 offset += datalen;
1241                 if (f->metadata) {
1242                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1243                         jffs2_free_full_dnode(f->metadata);
1244                         f->metadata = NULL;
1245                 }
1246         }
1247
1248         jffs2_gc_release_page(c, pg_ptr, &pg);
1249         return ret;
1250 }
1251