hugetlb: enforce quotas during reservation for shared mappings
[linux-2.6] / fs / jffs2 / build.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mtd/mtd.h>
17 #include "nodelist.h"
18
19 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
20                 struct jffs2_inode_cache *, struct jffs2_full_dirent **);
21
22 static inline struct jffs2_inode_cache *
23 first_inode_chain(int *i, struct jffs2_sb_info *c)
24 {
25         for (; *i < INOCACHE_HASHSIZE; (*i)++) {
26                 if (c->inocache_list[*i])
27                         return c->inocache_list[*i];
28         }
29         return NULL;
30 }
31
32 static inline struct jffs2_inode_cache *
33 next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
34 {
35         /* More in this chain? */
36         if (ic->next)
37                 return ic->next;
38         (*i)++;
39         return first_inode_chain(i, c);
40 }
41
42 #define for_each_inode(i, c, ic)                        \
43         for (i = 0, ic = first_inode_chain(&i, (c));    \
44              ic;                                        \
45              ic = next_inode(&i, ic, (c)))
46
47
48 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
49                                         struct jffs2_inode_cache *ic)
50 {
51         struct jffs2_full_dirent *fd;
52
53         dbg_fsbuild("building directory inode #%u\n", ic->ino);
54
55         /* For each child, increase nlink */
56         for(fd = ic->scan_dents; fd; fd = fd->next) {
57                 struct jffs2_inode_cache *child_ic;
58                 if (!fd->ino)
59                         continue;
60
61                 /* we can get high latency here with huge directories */
62
63                 child_ic = jffs2_get_ino_cache(c, fd->ino);
64                 if (!child_ic) {
65                         dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
66                                   fd->name, fd->ino, ic->ino);
67                         jffs2_mark_node_obsolete(c, fd->raw);
68                         continue;
69                 }
70
71                 if (child_ic->nlink++ && fd->type == DT_DIR) {
72                         JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
73                                 fd->name, fd->ino, ic->ino);
74                         /* TODO: What do we do about it? */
75                 }
76                 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
77                 /* Can't free scan_dents so far. We might need them in pass 2 */
78         }
79 }
80
81 /* Scan plan:
82  - Scan physical nodes. Build map of inodes/dirents. Allocate inocaches as we go
83  - Scan directory tree from top down, setting nlink in inocaches
84  - Scan inocaches for inodes with nlink==0
85 */
86 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
87 {
88         int ret;
89         int i;
90         struct jffs2_inode_cache *ic;
91         struct jffs2_full_dirent *fd;
92         struct jffs2_full_dirent *dead_fds = NULL;
93
94         dbg_fsbuild("build FS data structures\n");
95
96         /* First, scan the medium and build all the inode caches with
97            lists of physical nodes */
98
99         c->flags |= JFFS2_SB_FLAG_SCANNING;
100         ret = jffs2_scan_medium(c);
101         c->flags &= ~JFFS2_SB_FLAG_SCANNING;
102         if (ret)
103                 goto exit;
104
105         dbg_fsbuild("scanned flash completely\n");
106         jffs2_dbg_dump_block_lists_nolock(c);
107
108         dbg_fsbuild("pass 1 starting\n");
109         c->flags |= JFFS2_SB_FLAG_BUILDING;
110         /* Now scan the directory tree, increasing nlink according to every dirent found. */
111         for_each_inode(i, c, ic) {
112                 if (ic->scan_dents) {
113                         jffs2_build_inode_pass1(c, ic);
114                         cond_resched();
115                 }
116         }
117
118         dbg_fsbuild("pass 1 complete\n");
119
120         /* Next, scan for inodes with nlink == 0 and remove them. If
121            they were directories, then decrement the nlink of their
122            children too, and repeat the scan. As that's going to be
123            a fairly uncommon occurrence, it's not so evil to do it this
124            way. Recursion bad. */
125         dbg_fsbuild("pass 2 starting\n");
126
127         for_each_inode(i, c, ic) {
128                 if (ic->nlink)
129                         continue;
130
131                 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
132                 cond_resched();
133         }
134
135         dbg_fsbuild("pass 2a starting\n");
136
137         while (dead_fds) {
138                 fd = dead_fds;
139                 dead_fds = fd->next;
140
141                 ic = jffs2_get_ino_cache(c, fd->ino);
142
143                 if (ic)
144                         jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
145                 jffs2_free_full_dirent(fd);
146         }
147
148         dbg_fsbuild("pass 2a complete\n");
149         dbg_fsbuild("freeing temporary data structures\n");
150
151         /* Finally, we can scan again and free the dirent structs */
152         for_each_inode(i, c, ic) {
153                 while(ic->scan_dents) {
154                         fd = ic->scan_dents;
155                         ic->scan_dents = fd->next;
156                         jffs2_free_full_dirent(fd);
157                 }
158                 ic->scan_dents = NULL;
159                 cond_resched();
160         }
161         jffs2_build_xattr_subsystem(c);
162         c->flags &= ~JFFS2_SB_FLAG_BUILDING;
163
164         dbg_fsbuild("FS build complete\n");
165
166         /* Rotate the lists by some number to ensure wear levelling */
167         jffs2_rotate_lists(c);
168
169         ret = 0;
170
171 exit:
172         if (ret) {
173                 for_each_inode(i, c, ic) {
174                         while(ic->scan_dents) {
175                                 fd = ic->scan_dents;
176                                 ic->scan_dents = fd->next;
177                                 jffs2_free_full_dirent(fd);
178                         }
179                 }
180                 jffs2_clear_xattr_subsystem(c);
181         }
182
183         return ret;
184 }
185
186 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
187                                         struct jffs2_inode_cache *ic,
188                                         struct jffs2_full_dirent **dead_fds)
189 {
190         struct jffs2_raw_node_ref *raw;
191         struct jffs2_full_dirent *fd;
192
193         dbg_fsbuild("removing ino #%u with nlink == zero.\n", ic->ino);
194
195         raw = ic->nodes;
196         while (raw != (void *)ic) {
197                 struct jffs2_raw_node_ref *next = raw->next_in_ino;
198                 dbg_fsbuild("obsoleting node at 0x%08x\n", ref_offset(raw));
199                 jffs2_mark_node_obsolete(c, raw);
200                 raw = next;
201         }
202
203         if (ic->scan_dents) {
204                 int whinged = 0;
205                 dbg_fsbuild("inode #%u was a directory which may have children...\n", ic->ino);
206
207                 while(ic->scan_dents) {
208                         struct jffs2_inode_cache *child_ic;
209
210                         fd = ic->scan_dents;
211                         ic->scan_dents = fd->next;
212
213                         if (!fd->ino) {
214                                 /* It's a deletion dirent. Ignore it */
215                                 dbg_fsbuild("child \"%s\" is a deletion dirent, skipping...\n", fd->name);
216                                 jffs2_free_full_dirent(fd);
217                                 continue;
218                         }
219                         if (!whinged)
220                                 whinged = 1;
221
222                         dbg_fsbuild("removing child \"%s\", ino #%u\n", fd->name, fd->ino);
223
224                         child_ic = jffs2_get_ino_cache(c, fd->ino);
225                         if (!child_ic) {
226                                 dbg_fsbuild("cannot remove child \"%s\", ino #%u, because it doesn't exist\n",
227                                                 fd->name, fd->ino);
228                                 jffs2_free_full_dirent(fd);
229                                 continue;
230                         }
231
232                         /* Reduce nlink of the child. If it's now zero, stick it on the
233                            dead_fds list to be cleaned up later. Else just free the fd */
234
235                         child_ic->nlink--;
236
237                         if (!child_ic->nlink) {
238                                 dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n",
239                                           fd->ino, fd->name);
240                                 fd->next = *dead_fds;
241                                 *dead_fds = fd;
242                         } else {
243                                 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
244                                           fd->ino, fd->name, child_ic->nlink);
245                                 jffs2_free_full_dirent(fd);
246                         }
247                 }
248         }
249
250         /*
251            We don't delete the inocache from the hash list and free it yet.
252            The erase code will do that, when all the nodes are completely gone.
253         */
254 }
255
256 static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
257 {
258         uint32_t size;
259
260         /* Deletion should almost _always_ be allowed. We're fairly
261            buggered once we stop allowing people to delete stuff
262            because there's not enough free space... */
263         c->resv_blocks_deletion = 2;
264
265         /* Be conservative about how much space we need before we allow writes.
266            On top of that which is required for deletia, require an extra 2%
267            of the medium to be available, for overhead caused by nodes being
268            split across blocks, etc. */
269
270         size = c->flash_size / 50; /* 2% of flash size */
271         size += c->nr_blocks * 100; /* And 100 bytes per eraseblock */
272         size += c->sector_size - 1; /* ... and round up */
273
274         c->resv_blocks_write = c->resv_blocks_deletion + (size / c->sector_size);
275
276         /* When do we let the GC thread run in the background */
277
278         c->resv_blocks_gctrigger = c->resv_blocks_write + 1;
279
280         /* When do we allow garbage collection to merge nodes to make
281            long-term progress at the expense of short-term space exhaustion? */
282         c->resv_blocks_gcmerge = c->resv_blocks_deletion + 1;
283
284         /* When do we allow garbage collection to eat from bad blocks rather
285            than actually making progress? */
286         c->resv_blocks_gcbad = 0;//c->resv_blocks_deletion + 2;
287
288         /* What number of 'very dirty' eraseblocks do we allow before we
289            trigger the GC thread even if we don't _need_ the space. When we
290            can't mark nodes obsolete on the medium, the old dirty nodes cause
291            performance problems because we have to inspect and discard them. */
292         c->vdirty_blocks_gctrigger = c->resv_blocks_gctrigger;
293         if (jffs2_can_mark_obsolete(c))
294                 c->vdirty_blocks_gctrigger *= 10;
295
296         /* If there's less than this amount of dirty space, don't bother
297            trying to GC to make more space. It'll be a fruitless task */
298         c->nospc_dirty_size = c->sector_size + (c->flash_size / 100);
299
300         dbg_fsbuild("JFFS2 trigger levels (size %d KiB, block size %d KiB, %d blocks)\n",
301                   c->flash_size / 1024, c->sector_size / 1024, c->nr_blocks);
302         dbg_fsbuild("Blocks required to allow deletion:    %d (%d KiB)\n",
303                   c->resv_blocks_deletion, c->resv_blocks_deletion*c->sector_size/1024);
304         dbg_fsbuild("Blocks required to allow writes:      %d (%d KiB)\n",
305                   c->resv_blocks_write, c->resv_blocks_write*c->sector_size/1024);
306         dbg_fsbuild("Blocks required to quiesce GC thread: %d (%d KiB)\n",
307                   c->resv_blocks_gctrigger, c->resv_blocks_gctrigger*c->sector_size/1024);
308         dbg_fsbuild("Blocks required to allow GC merges:   %d (%d KiB)\n",
309                   c->resv_blocks_gcmerge, c->resv_blocks_gcmerge*c->sector_size/1024);
310         dbg_fsbuild("Blocks required to GC bad blocks:     %d (%d KiB)\n",
311                   c->resv_blocks_gcbad, c->resv_blocks_gcbad*c->sector_size/1024);
312         dbg_fsbuild("Amount of dirty space required to GC: %d bytes\n",
313                   c->nospc_dirty_size);
314         dbg_fsbuild("Very dirty blocks before GC triggered: %d\n",
315                   c->vdirty_blocks_gctrigger);
316 }
317
318 int jffs2_do_mount_fs(struct jffs2_sb_info *c)
319 {
320         int ret;
321         int i;
322         int size;
323
324         c->free_size = c->flash_size;
325         c->nr_blocks = c->flash_size / c->sector_size;
326         size = sizeof(struct jffs2_eraseblock) * c->nr_blocks;
327 #ifndef __ECOS
328         if (jffs2_blocks_use_vmalloc(c))
329                 c->blocks = vmalloc(size);
330         else
331 #endif
332                 c->blocks = kmalloc(size, GFP_KERNEL);
333         if (!c->blocks)
334                 return -ENOMEM;
335
336         memset(c->blocks, 0, size);
337         for (i=0; i<c->nr_blocks; i++) {
338                 INIT_LIST_HEAD(&c->blocks[i].list);
339                 c->blocks[i].offset = i * c->sector_size;
340                 c->blocks[i].free_size = c->sector_size;
341         }
342
343         INIT_LIST_HEAD(&c->clean_list);
344         INIT_LIST_HEAD(&c->very_dirty_list);
345         INIT_LIST_HEAD(&c->dirty_list);
346         INIT_LIST_HEAD(&c->erasable_list);
347         INIT_LIST_HEAD(&c->erasing_list);
348         INIT_LIST_HEAD(&c->erase_pending_list);
349         INIT_LIST_HEAD(&c->erasable_pending_wbuf_list);
350         INIT_LIST_HEAD(&c->erase_complete_list);
351         INIT_LIST_HEAD(&c->free_list);
352         INIT_LIST_HEAD(&c->bad_list);
353         INIT_LIST_HEAD(&c->bad_used_list);
354         c->highest_ino = 1;
355         c->summary = NULL;
356
357         ret = jffs2_sum_init(c);
358         if (ret)
359                 goto out_free;
360
361         if (jffs2_build_filesystem(c)) {
362                 dbg_fsbuild("build_fs failed\n");
363                 jffs2_free_ino_caches(c);
364                 jffs2_free_raw_node_refs(c);
365                 ret = -EIO;
366                 goto out_free;
367         }
368
369         jffs2_calc_trigger_levels(c);
370
371         return 0;
372
373  out_free:
374 #ifndef __ECOS
375         if (jffs2_blocks_use_vmalloc(c))
376                 vfree(c->blocks);
377         else
378 #endif
379                 kfree(c->blocks);
380
381         return ret;
382 }