ext4: Move fs/ext4/group.h into ext4.h
[linux-2.6] / fs / ext4 / super.c
1 /*
2  *  linux/fs/ext4/super.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/inode.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  */
18
19 #include <linux/module.h>
20 #include <linux/string.h>
21 #include <linux/fs.h>
22 #include <linux/time.h>
23 #include <linux/vmalloc.h>
24 #include <linux/jbd2.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/parser.h>
29 #include <linux/smp_lock.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/proc_fs.h>
39 #include <linux/ctype.h>
40 #include <linux/marker.h>
41 #include <linux/log2.h>
42 #include <linux/crc16.h>
43 #include <asm/uaccess.h>
44
45 #include "ext4.h"
46 #include "ext4_jbd2.h"
47 #include "xattr.h"
48 #include "acl.h"
49
50 struct proc_dir_entry *ext4_proc_root;
51 static struct kset *ext4_kset;
52
53 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
54                              unsigned long journal_devnum);
55 static int ext4_commit_super(struct super_block *sb, int sync);
56 static void ext4_mark_recovery_complete(struct super_block *sb,
57                                         struct ext4_super_block *es);
58 static void ext4_clear_journal_err(struct super_block *sb,
59                                    struct ext4_super_block *es);
60 static int ext4_sync_fs(struct super_block *sb, int wait);
61 static const char *ext4_decode_error(struct super_block *sb, int errno,
62                                      char nbuf[16]);
63 static int ext4_remount(struct super_block *sb, int *flags, char *data);
64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
65 static int ext4_unfreeze(struct super_block *sb);
66 static void ext4_write_super(struct super_block *sb);
67 static int ext4_freeze(struct super_block *sb);
68
69
70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
71                                struct ext4_group_desc *bg)
72 {
73         return le32_to_cpu(bg->bg_block_bitmap_lo) |
74                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
75                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
76 }
77
78 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
79                                struct ext4_group_desc *bg)
80 {
81         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
82                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
83                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
84 }
85
86 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
87                               struct ext4_group_desc *bg)
88 {
89         return le32_to_cpu(bg->bg_inode_table_lo) |
90                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
91                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
92 }
93
94 __u32 ext4_free_blks_count(struct super_block *sb,
95                               struct ext4_group_desc *bg)
96 {
97         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
98                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
99                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
100 }
101
102 __u32 ext4_free_inodes_count(struct super_block *sb,
103                               struct ext4_group_desc *bg)
104 {
105         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
106                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
107                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
108 }
109
110 __u32 ext4_used_dirs_count(struct super_block *sb,
111                               struct ext4_group_desc *bg)
112 {
113         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
114                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
115                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
116 }
117
118 __u32 ext4_itable_unused_count(struct super_block *sb,
119                               struct ext4_group_desc *bg)
120 {
121         return le16_to_cpu(bg->bg_itable_unused_lo) |
122                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
123                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
124 }
125
126 void ext4_block_bitmap_set(struct super_block *sb,
127                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
128 {
129         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
130         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
131                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
132 }
133
134 void ext4_inode_bitmap_set(struct super_block *sb,
135                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
136 {
137         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
138         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
139                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
140 }
141
142 void ext4_inode_table_set(struct super_block *sb,
143                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
144 {
145         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
146         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
147                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
148 }
149
150 void ext4_free_blks_set(struct super_block *sb,
151                           struct ext4_group_desc *bg, __u32 count)
152 {
153         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
154         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
155                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
156 }
157
158 void ext4_free_inodes_set(struct super_block *sb,
159                           struct ext4_group_desc *bg, __u32 count)
160 {
161         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
162         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
163                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
164 }
165
166 void ext4_used_dirs_set(struct super_block *sb,
167                           struct ext4_group_desc *bg, __u32 count)
168 {
169         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
170         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
171                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
172 }
173
174 void ext4_itable_unused_set(struct super_block *sb,
175                           struct ext4_group_desc *bg, __u32 count)
176 {
177         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
178         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
179                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
180 }
181
182 /*
183  * Wrappers for jbd2_journal_start/end.
184  *
185  * The only special thing we need to do here is to make sure that all
186  * journal_end calls result in the superblock being marked dirty, so
187  * that sync() will call the filesystem's write_super callback if
188  * appropriate.
189  */
190 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
191 {
192         journal_t *journal;
193
194         if (sb->s_flags & MS_RDONLY)
195                 return ERR_PTR(-EROFS);
196
197         /* Special case here: if the journal has aborted behind our
198          * backs (eg. EIO in the commit thread), then we still need to
199          * take the FS itself readonly cleanly. */
200         journal = EXT4_SB(sb)->s_journal;
201         if (journal) {
202                 if (is_journal_aborted(journal)) {
203                         ext4_abort(sb, __func__,
204                                    "Detected aborted journal");
205                         return ERR_PTR(-EROFS);
206                 }
207                 return jbd2_journal_start(journal, nblocks);
208         }
209         /*
210          * We're not journaling, return the appropriate indication.
211          */
212         current->journal_info = EXT4_NOJOURNAL_HANDLE;
213         return current->journal_info;
214 }
215
216 /*
217  * The only special thing we need to do here is to make sure that all
218  * jbd2_journal_stop calls result in the superblock being marked dirty, so
219  * that sync() will call the filesystem's write_super callback if
220  * appropriate.
221  */
222 int __ext4_journal_stop(const char *where, handle_t *handle)
223 {
224         struct super_block *sb;
225         int err;
226         int rc;
227
228         if (!ext4_handle_valid(handle)) {
229                 /*
230                  * Do this here since we don't call jbd2_journal_stop() in
231                  * no-journal mode.
232                  */
233                 current->journal_info = NULL;
234                 return 0;
235         }
236         sb = handle->h_transaction->t_journal->j_private;
237         err = handle->h_err;
238         rc = jbd2_journal_stop(handle);
239
240         if (!err)
241                 err = rc;
242         if (err)
243                 __ext4_std_error(sb, where, err);
244         return err;
245 }
246
247 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
248                 struct buffer_head *bh, handle_t *handle, int err)
249 {
250         char nbuf[16];
251         const char *errstr = ext4_decode_error(NULL, err, nbuf);
252
253         BUG_ON(!ext4_handle_valid(handle));
254
255         if (bh)
256                 BUFFER_TRACE(bh, "abort");
257
258         if (!handle->h_err)
259                 handle->h_err = err;
260
261         if (is_handle_aborted(handle))
262                 return;
263
264         printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
265                caller, errstr, err_fn);
266
267         jbd2_journal_abort_handle(handle);
268 }
269
270 /* Deal with the reporting of failure conditions on a filesystem such as
271  * inconsistencies detected or read IO failures.
272  *
273  * On ext2, we can store the error state of the filesystem in the
274  * superblock.  That is not possible on ext4, because we may have other
275  * write ordering constraints on the superblock which prevent us from
276  * writing it out straight away; and given that the journal is about to
277  * be aborted, we can't rely on the current, or future, transactions to
278  * write out the superblock safely.
279  *
280  * We'll just use the jbd2_journal_abort() error code to record an error in
281  * the journal instead.  On recovery, the journal will compain about
282  * that error until we've noted it down and cleared it.
283  */
284
285 static void ext4_handle_error(struct super_block *sb)
286 {
287         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
288
289         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
290         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
291
292         if (sb->s_flags & MS_RDONLY)
293                 return;
294
295         if (!test_opt(sb, ERRORS_CONT)) {
296                 journal_t *journal = EXT4_SB(sb)->s_journal;
297
298                 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
299                 if (journal)
300                         jbd2_journal_abort(journal, -EIO);
301         }
302         if (test_opt(sb, ERRORS_RO)) {
303                 printk(KERN_CRIT "Remounting filesystem read-only\n");
304                 sb->s_flags |= MS_RDONLY;
305         }
306         ext4_commit_super(sb, 1);
307         if (test_opt(sb, ERRORS_PANIC))
308                 panic("EXT4-fs (device %s): panic forced after error\n",
309                         sb->s_id);
310 }
311
312 void ext4_error(struct super_block *sb, const char *function,
313                 const char *fmt, ...)
314 {
315         va_list args;
316
317         va_start(args, fmt);
318         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
319         vprintk(fmt, args);
320         printk("\n");
321         va_end(args);
322
323         ext4_handle_error(sb);
324 }
325
326 static const char *ext4_decode_error(struct super_block *sb, int errno,
327                                      char nbuf[16])
328 {
329         char *errstr = NULL;
330
331         switch (errno) {
332         case -EIO:
333                 errstr = "IO failure";
334                 break;
335         case -ENOMEM:
336                 errstr = "Out of memory";
337                 break;
338         case -EROFS:
339                 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
340                         errstr = "Journal has aborted";
341                 else
342                         errstr = "Readonly filesystem";
343                 break;
344         default:
345                 /* If the caller passed in an extra buffer for unknown
346                  * errors, textualise them now.  Else we just return
347                  * NULL. */
348                 if (nbuf) {
349                         /* Check for truncated error codes... */
350                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
351                                 errstr = nbuf;
352                 }
353                 break;
354         }
355
356         return errstr;
357 }
358
359 /* __ext4_std_error decodes expected errors from journaling functions
360  * automatically and invokes the appropriate error response.  */
361
362 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
363 {
364         char nbuf[16];
365         const char *errstr;
366
367         /* Special case: if the error is EROFS, and we're not already
368          * inside a transaction, then there's really no point in logging
369          * an error. */
370         if (errno == -EROFS && journal_current_handle() == NULL &&
371             (sb->s_flags & MS_RDONLY))
372                 return;
373
374         errstr = ext4_decode_error(sb, errno, nbuf);
375         printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
376                sb->s_id, function, errstr);
377
378         ext4_handle_error(sb);
379 }
380
381 /*
382  * ext4_abort is a much stronger failure handler than ext4_error.  The
383  * abort function may be used to deal with unrecoverable failures such
384  * as journal IO errors or ENOMEM at a critical moment in log management.
385  *
386  * We unconditionally force the filesystem into an ABORT|READONLY state,
387  * unless the error response on the fs has been set to panic in which
388  * case we take the easy way out and panic immediately.
389  */
390
391 void ext4_abort(struct super_block *sb, const char *function,
392                 const char *fmt, ...)
393 {
394         va_list args;
395
396         printk(KERN_CRIT "ext4_abort called.\n");
397
398         va_start(args, fmt);
399         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
400         vprintk(fmt, args);
401         printk("\n");
402         va_end(args);
403
404         if (test_opt(sb, ERRORS_PANIC))
405                 panic("EXT4-fs panic from previous error\n");
406
407         if (sb->s_flags & MS_RDONLY)
408                 return;
409
410         printk(KERN_CRIT "Remounting filesystem read-only\n");
411         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
412         sb->s_flags |= MS_RDONLY;
413         EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
414         if (EXT4_SB(sb)->s_journal)
415                 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
416 }
417
418 void ext4_warning(struct super_block *sb, const char *function,
419                   const char *fmt, ...)
420 {
421         va_list args;
422
423         va_start(args, fmt);
424         printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
425                sb->s_id, function);
426         vprintk(fmt, args);
427         printk("\n");
428         va_end(args);
429 }
430
431 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
432                                 const char *function, const char *fmt, ...)
433 __releases(bitlock)
434 __acquires(bitlock)
435 {
436         va_list args;
437         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
438
439         va_start(args, fmt);
440         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
441         vprintk(fmt, args);
442         printk("\n");
443         va_end(args);
444
445         if (test_opt(sb, ERRORS_CONT)) {
446                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
447                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
448                 ext4_commit_super(sb, 0);
449                 return;
450         }
451         ext4_unlock_group(sb, grp);
452         ext4_handle_error(sb);
453         /*
454          * We only get here in the ERRORS_RO case; relocking the group
455          * may be dangerous, but nothing bad will happen since the
456          * filesystem will have already been marked read/only and the
457          * journal has been aborted.  We return 1 as a hint to callers
458          * who might what to use the return value from
459          * ext4_grp_locked_error() to distinguish beween the
460          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
461          * aggressively from the ext4 function in question, with a
462          * more appropriate error code.
463          */
464         ext4_lock_group(sb, grp);
465         return;
466 }
467
468
469 void ext4_update_dynamic_rev(struct super_block *sb)
470 {
471         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
472
473         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
474                 return;
475
476         ext4_warning(sb, __func__,
477                      "updating to rev %d because of new feature flag, "
478                      "running e2fsck is recommended",
479                      EXT4_DYNAMIC_REV);
480
481         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
482         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
483         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
484         /* leave es->s_feature_*compat flags alone */
485         /* es->s_uuid will be set by e2fsck if empty */
486
487         /*
488          * The rest of the superblock fields should be zero, and if not it
489          * means they are likely already in use, so leave them alone.  We
490          * can leave it up to e2fsck to clean up any inconsistencies there.
491          */
492 }
493
494 /*
495  * Open the external journal device
496  */
497 static struct block_device *ext4_blkdev_get(dev_t dev)
498 {
499         struct block_device *bdev;
500         char b[BDEVNAME_SIZE];
501
502         bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
503         if (IS_ERR(bdev))
504                 goto fail;
505         return bdev;
506
507 fail:
508         printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
509                         __bdevname(dev, b), PTR_ERR(bdev));
510         return NULL;
511 }
512
513 /*
514  * Release the journal device
515  */
516 static int ext4_blkdev_put(struct block_device *bdev)
517 {
518         bd_release(bdev);
519         return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
520 }
521
522 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
523 {
524         struct block_device *bdev;
525         int ret = -ENODEV;
526
527         bdev = sbi->journal_bdev;
528         if (bdev) {
529                 ret = ext4_blkdev_put(bdev);
530                 sbi->journal_bdev = NULL;
531         }
532         return ret;
533 }
534
535 static inline struct inode *orphan_list_entry(struct list_head *l)
536 {
537         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
538 }
539
540 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
541 {
542         struct list_head *l;
543
544         printk(KERN_ERR "sb orphan head is %d\n",
545                le32_to_cpu(sbi->s_es->s_last_orphan));
546
547         printk(KERN_ERR "sb_info orphan list:\n");
548         list_for_each(l, &sbi->s_orphan) {
549                 struct inode *inode = orphan_list_entry(l);
550                 printk(KERN_ERR "  "
551                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
552                        inode->i_sb->s_id, inode->i_ino, inode,
553                        inode->i_mode, inode->i_nlink,
554                        NEXT_ORPHAN(inode));
555         }
556 }
557
558 static void ext4_put_super(struct super_block *sb)
559 {
560         struct ext4_sb_info *sbi = EXT4_SB(sb);
561         struct ext4_super_block *es = sbi->s_es;
562         int i, err;
563
564         ext4_mb_release(sb);
565         ext4_ext_release(sb);
566         ext4_xattr_put_super(sb);
567         if (sbi->s_journal) {
568                 err = jbd2_journal_destroy(sbi->s_journal);
569                 sbi->s_journal = NULL;
570                 if (err < 0)
571                         ext4_abort(sb, __func__,
572                                    "Couldn't clean up the journal");
573         }
574         if (!(sb->s_flags & MS_RDONLY)) {
575                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
576                 es->s_state = cpu_to_le16(sbi->s_mount_state);
577                 ext4_commit_super(sb, 1);
578         }
579         if (sbi->s_proc) {
580                 remove_proc_entry(sb->s_id, ext4_proc_root);
581         }
582         kobject_del(&sbi->s_kobj);
583
584         for (i = 0; i < sbi->s_gdb_count; i++)
585                 brelse(sbi->s_group_desc[i]);
586         kfree(sbi->s_group_desc);
587         if (is_vmalloc_addr(sbi->s_flex_groups))
588                 vfree(sbi->s_flex_groups);
589         else
590                 kfree(sbi->s_flex_groups);
591         percpu_counter_destroy(&sbi->s_freeblocks_counter);
592         percpu_counter_destroy(&sbi->s_freeinodes_counter);
593         percpu_counter_destroy(&sbi->s_dirs_counter);
594         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
595         brelse(sbi->s_sbh);
596 #ifdef CONFIG_QUOTA
597         for (i = 0; i < MAXQUOTAS; i++)
598                 kfree(sbi->s_qf_names[i]);
599 #endif
600
601         /* Debugging code just in case the in-memory inode orphan list
602          * isn't empty.  The on-disk one can be non-empty if we've
603          * detected an error and taken the fs readonly, but the
604          * in-memory list had better be clean by this point. */
605         if (!list_empty(&sbi->s_orphan))
606                 dump_orphan_list(sb, sbi);
607         J_ASSERT(list_empty(&sbi->s_orphan));
608
609         invalidate_bdev(sb->s_bdev);
610         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
611                 /*
612                  * Invalidate the journal device's buffers.  We don't want them
613                  * floating about in memory - the physical journal device may
614                  * hotswapped, and it breaks the `ro-after' testing code.
615                  */
616                 sync_blockdev(sbi->journal_bdev);
617                 invalidate_bdev(sbi->journal_bdev);
618                 ext4_blkdev_remove(sbi);
619         }
620         sb->s_fs_info = NULL;
621         /*
622          * Now that we are completely done shutting down the
623          * superblock, we need to actually destroy the kobject.
624          */
625         unlock_kernel();
626         unlock_super(sb);
627         kobject_put(&sbi->s_kobj);
628         wait_for_completion(&sbi->s_kobj_unregister);
629         lock_super(sb);
630         lock_kernel();
631         kfree(sbi->s_blockgroup_lock);
632         kfree(sbi);
633         return;
634 }
635
636 static struct kmem_cache *ext4_inode_cachep;
637
638 /*
639  * Called inside transaction, so use GFP_NOFS
640  */
641 static struct inode *ext4_alloc_inode(struct super_block *sb)
642 {
643         struct ext4_inode_info *ei;
644
645         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
646         if (!ei)
647                 return NULL;
648 #ifdef CONFIG_EXT4_FS_POSIX_ACL
649         ei->i_acl = EXT4_ACL_NOT_CACHED;
650         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
651 #endif
652         ei->vfs_inode.i_version = 1;
653         ei->vfs_inode.i_data.writeback_index = 0;
654         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
655         INIT_LIST_HEAD(&ei->i_prealloc_list);
656         spin_lock_init(&ei->i_prealloc_lock);
657         /*
658          * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
659          * therefore it can be null here.  Don't check it, just initialize
660          * jinode.
661          */
662         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
663         ei->i_reserved_data_blocks = 0;
664         ei->i_reserved_meta_blocks = 0;
665         ei->i_allocated_meta_blocks = 0;
666         ei->i_delalloc_reserved_flag = 0;
667         spin_lock_init(&(ei->i_block_reservation_lock));
668         return &ei->vfs_inode;
669 }
670
671 static void ext4_destroy_inode(struct inode *inode)
672 {
673         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
674                 printk("EXT4 Inode %p: orphan list check failed!\n",
675                         EXT4_I(inode));
676                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
677                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
678                                 true);
679                 dump_stack();
680         }
681         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
682 }
683
684 static void init_once(void *foo)
685 {
686         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
687
688         INIT_LIST_HEAD(&ei->i_orphan);
689 #ifdef CONFIG_EXT4_FS_XATTR
690         init_rwsem(&ei->xattr_sem);
691 #endif
692         init_rwsem(&ei->i_data_sem);
693         inode_init_once(&ei->vfs_inode);
694 }
695
696 static int init_inodecache(void)
697 {
698         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
699                                              sizeof(struct ext4_inode_info),
700                                              0, (SLAB_RECLAIM_ACCOUNT|
701                                                 SLAB_MEM_SPREAD),
702                                              init_once);
703         if (ext4_inode_cachep == NULL)
704                 return -ENOMEM;
705         return 0;
706 }
707
708 static void destroy_inodecache(void)
709 {
710         kmem_cache_destroy(ext4_inode_cachep);
711 }
712
713 static void ext4_clear_inode(struct inode *inode)
714 {
715 #ifdef CONFIG_EXT4_FS_POSIX_ACL
716         if (EXT4_I(inode)->i_acl &&
717                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
718                 posix_acl_release(EXT4_I(inode)->i_acl);
719                 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
720         }
721         if (EXT4_I(inode)->i_default_acl &&
722                         EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
723                 posix_acl_release(EXT4_I(inode)->i_default_acl);
724                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
725         }
726 #endif
727         ext4_discard_preallocations(inode);
728         if (EXT4_JOURNAL(inode))
729                 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
730                                        &EXT4_I(inode)->jinode);
731 }
732
733 static inline void ext4_show_quota_options(struct seq_file *seq,
734                                            struct super_block *sb)
735 {
736 #if defined(CONFIG_QUOTA)
737         struct ext4_sb_info *sbi = EXT4_SB(sb);
738
739         if (sbi->s_jquota_fmt)
740                 seq_printf(seq, ",jqfmt=%s",
741                 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
742
743         if (sbi->s_qf_names[USRQUOTA])
744                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
745
746         if (sbi->s_qf_names[GRPQUOTA])
747                 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
748
749         if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
750                 seq_puts(seq, ",usrquota");
751
752         if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
753                 seq_puts(seq, ",grpquota");
754 #endif
755 }
756
757 /*
758  * Show an option if
759  *  - it's set to a non-default value OR
760  *  - if the per-sb default is different from the global default
761  */
762 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
763 {
764         int def_errors;
765         unsigned long def_mount_opts;
766         struct super_block *sb = vfs->mnt_sb;
767         struct ext4_sb_info *sbi = EXT4_SB(sb);
768         struct ext4_super_block *es = sbi->s_es;
769
770         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
771         def_errors     = le16_to_cpu(es->s_errors);
772
773         if (sbi->s_sb_block != 1)
774                 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
775         if (test_opt(sb, MINIX_DF))
776                 seq_puts(seq, ",minixdf");
777         if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
778                 seq_puts(seq, ",grpid");
779         if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
780                 seq_puts(seq, ",nogrpid");
781         if (sbi->s_resuid != EXT4_DEF_RESUID ||
782             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
783                 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
784         }
785         if (sbi->s_resgid != EXT4_DEF_RESGID ||
786             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
787                 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
788         }
789         if (test_opt(sb, ERRORS_RO)) {
790                 if (def_errors == EXT4_ERRORS_PANIC ||
791                     def_errors == EXT4_ERRORS_CONTINUE) {
792                         seq_puts(seq, ",errors=remount-ro");
793                 }
794         }
795         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
796                 seq_puts(seq, ",errors=continue");
797         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
798                 seq_puts(seq, ",errors=panic");
799         if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
800                 seq_puts(seq, ",nouid32");
801         if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
802                 seq_puts(seq, ",debug");
803         if (test_opt(sb, OLDALLOC))
804                 seq_puts(seq, ",oldalloc");
805 #ifdef CONFIG_EXT4_FS_XATTR
806         if (test_opt(sb, XATTR_USER) &&
807                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
808                 seq_puts(seq, ",user_xattr");
809         if (!test_opt(sb, XATTR_USER) &&
810             (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
811                 seq_puts(seq, ",nouser_xattr");
812         }
813 #endif
814 #ifdef CONFIG_EXT4_FS_POSIX_ACL
815         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
816                 seq_puts(seq, ",acl");
817         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
818                 seq_puts(seq, ",noacl");
819 #endif
820         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
821                 seq_printf(seq, ",commit=%u",
822                            (unsigned) (sbi->s_commit_interval / HZ));
823         }
824         if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
825                 seq_printf(seq, ",min_batch_time=%u",
826                            (unsigned) sbi->s_min_batch_time);
827         }
828         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
829                 seq_printf(seq, ",max_batch_time=%u",
830                            (unsigned) sbi->s_min_batch_time);
831         }
832
833         /*
834          * We're changing the default of barrier mount option, so
835          * let's always display its mount state so it's clear what its
836          * status is.
837          */
838         seq_puts(seq, ",barrier=");
839         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
840         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
841                 seq_puts(seq, ",journal_async_commit");
842         if (test_opt(sb, NOBH))
843                 seq_puts(seq, ",nobh");
844         if (test_opt(sb, I_VERSION))
845                 seq_puts(seq, ",i_version");
846         if (!test_opt(sb, DELALLOC))
847                 seq_puts(seq, ",nodelalloc");
848
849
850         if (sbi->s_stripe)
851                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
852         /*
853          * journal mode get enabled in different ways
854          * So just print the value even if we didn't specify it
855          */
856         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
857                 seq_puts(seq, ",data=journal");
858         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
859                 seq_puts(seq, ",data=ordered");
860         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
861                 seq_puts(seq, ",data=writeback");
862
863         if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
864                 seq_printf(seq, ",inode_readahead_blks=%u",
865                            sbi->s_inode_readahead_blks);
866
867         if (test_opt(sb, DATA_ERR_ABORT))
868                 seq_puts(seq, ",data_err=abort");
869
870         if (test_opt(sb, NO_AUTO_DA_ALLOC))
871                 seq_puts(seq, ",noauto_da_alloc");
872
873         ext4_show_quota_options(seq, sb);
874         return 0;
875 }
876
877
878 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
879                 u64 ino, u32 generation)
880 {
881         struct inode *inode;
882
883         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
884                 return ERR_PTR(-ESTALE);
885         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
886                 return ERR_PTR(-ESTALE);
887
888         /* iget isn't really right if the inode is currently unallocated!!
889          *
890          * ext4_read_inode will return a bad_inode if the inode had been
891          * deleted, so we should be safe.
892          *
893          * Currently we don't know the generation for parent directory, so
894          * a generation of 0 means "accept any"
895          */
896         inode = ext4_iget(sb, ino);
897         if (IS_ERR(inode))
898                 return ERR_CAST(inode);
899         if (generation && inode->i_generation != generation) {
900                 iput(inode);
901                 return ERR_PTR(-ESTALE);
902         }
903
904         return inode;
905 }
906
907 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
908                 int fh_len, int fh_type)
909 {
910         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
911                                     ext4_nfs_get_inode);
912 }
913
914 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
915                 int fh_len, int fh_type)
916 {
917         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
918                                     ext4_nfs_get_inode);
919 }
920
921 /*
922  * Try to release metadata pages (indirect blocks, directories) which are
923  * mapped via the block device.  Since these pages could have journal heads
924  * which would prevent try_to_free_buffers() from freeing them, we must use
925  * jbd2 layer's try_to_free_buffers() function to release them.
926  */
927 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
928 {
929         journal_t *journal = EXT4_SB(sb)->s_journal;
930
931         WARN_ON(PageChecked(page));
932         if (!page_has_buffers(page))
933                 return 0;
934         if (journal)
935                 return jbd2_journal_try_to_free_buffers(journal, page,
936                                                         wait & ~__GFP_WAIT);
937         return try_to_free_buffers(page);
938 }
939
940 #ifdef CONFIG_QUOTA
941 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
942 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
943
944 static int ext4_write_dquot(struct dquot *dquot);
945 static int ext4_acquire_dquot(struct dquot *dquot);
946 static int ext4_release_dquot(struct dquot *dquot);
947 static int ext4_mark_dquot_dirty(struct dquot *dquot);
948 static int ext4_write_info(struct super_block *sb, int type);
949 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
950                                 char *path, int remount);
951 static int ext4_quota_on_mount(struct super_block *sb, int type);
952 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
953                                size_t len, loff_t off);
954 static ssize_t ext4_quota_write(struct super_block *sb, int type,
955                                 const char *data, size_t len, loff_t off);
956
957 static struct dquot_operations ext4_quota_operations = {
958         .initialize     = dquot_initialize,
959         .drop           = dquot_drop,
960         .alloc_space    = dquot_alloc_space,
961         .reserve_space  = dquot_reserve_space,
962         .claim_space    = dquot_claim_space,
963         .release_rsv    = dquot_release_reserved_space,
964         .get_reserved_space = ext4_get_reserved_space,
965         .alloc_inode    = dquot_alloc_inode,
966         .free_space     = dquot_free_space,
967         .free_inode     = dquot_free_inode,
968         .transfer       = dquot_transfer,
969         .write_dquot    = ext4_write_dquot,
970         .acquire_dquot  = ext4_acquire_dquot,
971         .release_dquot  = ext4_release_dquot,
972         .mark_dirty     = ext4_mark_dquot_dirty,
973         .write_info     = ext4_write_info,
974         .alloc_dquot    = dquot_alloc,
975         .destroy_dquot  = dquot_destroy,
976 };
977
978 static struct quotactl_ops ext4_qctl_operations = {
979         .quota_on       = ext4_quota_on,
980         .quota_off      = vfs_quota_off,
981         .quota_sync     = vfs_quota_sync,
982         .get_info       = vfs_get_dqinfo,
983         .set_info       = vfs_set_dqinfo,
984         .get_dqblk      = vfs_get_dqblk,
985         .set_dqblk      = vfs_set_dqblk
986 };
987 #endif
988
989 static const struct super_operations ext4_sops = {
990         .alloc_inode    = ext4_alloc_inode,
991         .destroy_inode  = ext4_destroy_inode,
992         .write_inode    = ext4_write_inode,
993         .dirty_inode    = ext4_dirty_inode,
994         .delete_inode   = ext4_delete_inode,
995         .put_super      = ext4_put_super,
996         .sync_fs        = ext4_sync_fs,
997         .freeze_fs      = ext4_freeze,
998         .unfreeze_fs    = ext4_unfreeze,
999         .statfs         = ext4_statfs,
1000         .remount_fs     = ext4_remount,
1001         .clear_inode    = ext4_clear_inode,
1002         .show_options   = ext4_show_options,
1003 #ifdef CONFIG_QUOTA
1004         .quota_read     = ext4_quota_read,
1005         .quota_write    = ext4_quota_write,
1006 #endif
1007         .bdev_try_to_free_page = bdev_try_to_free_page,
1008 };
1009
1010 static const struct super_operations ext4_nojournal_sops = {
1011         .alloc_inode    = ext4_alloc_inode,
1012         .destroy_inode  = ext4_destroy_inode,
1013         .write_inode    = ext4_write_inode,
1014         .dirty_inode    = ext4_dirty_inode,
1015         .delete_inode   = ext4_delete_inode,
1016         .write_super    = ext4_write_super,
1017         .put_super      = ext4_put_super,
1018         .statfs         = ext4_statfs,
1019         .remount_fs     = ext4_remount,
1020         .clear_inode    = ext4_clear_inode,
1021         .show_options   = ext4_show_options,
1022 #ifdef CONFIG_QUOTA
1023         .quota_read     = ext4_quota_read,
1024         .quota_write    = ext4_quota_write,
1025 #endif
1026         .bdev_try_to_free_page = bdev_try_to_free_page,
1027 };
1028
1029 static const struct export_operations ext4_export_ops = {
1030         .fh_to_dentry = ext4_fh_to_dentry,
1031         .fh_to_parent = ext4_fh_to_parent,
1032         .get_parent = ext4_get_parent,
1033 };
1034
1035 enum {
1036         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1037         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1038         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1039         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1040         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1041         Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1042         Opt_journal_update, Opt_journal_dev,
1043         Opt_journal_checksum, Opt_journal_async_commit,
1044         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1045         Opt_data_err_abort, Opt_data_err_ignore,
1046         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1047         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1048         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1049         Opt_usrquota, Opt_grpquota, Opt_i_version,
1050         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1051         Opt_inode_readahead_blks, Opt_journal_ioprio
1052 };
1053
1054 static const match_table_t tokens = {
1055         {Opt_bsd_df, "bsddf"},
1056         {Opt_minix_df, "minixdf"},
1057         {Opt_grpid, "grpid"},
1058         {Opt_grpid, "bsdgroups"},
1059         {Opt_nogrpid, "nogrpid"},
1060         {Opt_nogrpid, "sysvgroups"},
1061         {Opt_resgid, "resgid=%u"},
1062         {Opt_resuid, "resuid=%u"},
1063         {Opt_sb, "sb=%u"},
1064         {Opt_err_cont, "errors=continue"},
1065         {Opt_err_panic, "errors=panic"},
1066         {Opt_err_ro, "errors=remount-ro"},
1067         {Opt_nouid32, "nouid32"},
1068         {Opt_debug, "debug"},
1069         {Opt_oldalloc, "oldalloc"},
1070         {Opt_orlov, "orlov"},
1071         {Opt_user_xattr, "user_xattr"},
1072         {Opt_nouser_xattr, "nouser_xattr"},
1073         {Opt_acl, "acl"},
1074         {Opt_noacl, "noacl"},
1075         {Opt_noload, "noload"},
1076         {Opt_nobh, "nobh"},
1077         {Opt_bh, "bh"},
1078         {Opt_commit, "commit=%u"},
1079         {Opt_min_batch_time, "min_batch_time=%u"},
1080         {Opt_max_batch_time, "max_batch_time=%u"},
1081         {Opt_journal_update, "journal=update"},
1082         {Opt_journal_dev, "journal_dev=%u"},
1083         {Opt_journal_checksum, "journal_checksum"},
1084         {Opt_journal_async_commit, "journal_async_commit"},
1085         {Opt_abort, "abort"},
1086         {Opt_data_journal, "data=journal"},
1087         {Opt_data_ordered, "data=ordered"},
1088         {Opt_data_writeback, "data=writeback"},
1089         {Opt_data_err_abort, "data_err=abort"},
1090         {Opt_data_err_ignore, "data_err=ignore"},
1091         {Opt_offusrjquota, "usrjquota="},
1092         {Opt_usrjquota, "usrjquota=%s"},
1093         {Opt_offgrpjquota, "grpjquota="},
1094         {Opt_grpjquota, "grpjquota=%s"},
1095         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1096         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1097         {Opt_grpquota, "grpquota"},
1098         {Opt_noquota, "noquota"},
1099         {Opt_quota, "quota"},
1100         {Opt_usrquota, "usrquota"},
1101         {Opt_barrier, "barrier=%u"},
1102         {Opt_barrier, "barrier"},
1103         {Opt_nobarrier, "nobarrier"},
1104         {Opt_i_version, "i_version"},
1105         {Opt_stripe, "stripe=%u"},
1106         {Opt_resize, "resize"},
1107         {Opt_delalloc, "delalloc"},
1108         {Opt_nodelalloc, "nodelalloc"},
1109         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1110         {Opt_journal_ioprio, "journal_ioprio=%u"},
1111         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1112         {Opt_auto_da_alloc, "auto_da_alloc"},
1113         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1114         {Opt_err, NULL},
1115 };
1116
1117 static ext4_fsblk_t get_sb_block(void **data)
1118 {
1119         ext4_fsblk_t    sb_block;
1120         char            *options = (char *) *data;
1121
1122         if (!options || strncmp(options, "sb=", 3) != 0)
1123                 return 1;       /* Default location */
1124         options += 3;
1125         /*todo: use simple_strtoll with >32bit ext4 */
1126         sb_block = simple_strtoul(options, &options, 0);
1127         if (*options && *options != ',') {
1128                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1129                        (char *) *data);
1130                 return 1;
1131         }
1132         if (*options == ',')
1133                 options++;
1134         *data = (void *) options;
1135         return sb_block;
1136 }
1137
1138 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1139
1140 static int parse_options(char *options, struct super_block *sb,
1141                          unsigned long *journal_devnum,
1142                          unsigned int *journal_ioprio,
1143                          ext4_fsblk_t *n_blocks_count, int is_remount)
1144 {
1145         struct ext4_sb_info *sbi = EXT4_SB(sb);
1146         char *p;
1147         substring_t args[MAX_OPT_ARGS];
1148         int data_opt = 0;
1149         int option;
1150 #ifdef CONFIG_QUOTA
1151         int qtype, qfmt;
1152         char *qname;
1153 #endif
1154
1155         if (!options)
1156                 return 1;
1157
1158         while ((p = strsep(&options, ",")) != NULL) {
1159                 int token;
1160                 if (!*p)
1161                         continue;
1162
1163                 token = match_token(p, tokens, args);
1164                 switch (token) {
1165                 case Opt_bsd_df:
1166                         clear_opt(sbi->s_mount_opt, MINIX_DF);
1167                         break;
1168                 case Opt_minix_df:
1169                         set_opt(sbi->s_mount_opt, MINIX_DF);
1170                         break;
1171                 case Opt_grpid:
1172                         set_opt(sbi->s_mount_opt, GRPID);
1173                         break;
1174                 case Opt_nogrpid:
1175                         clear_opt(sbi->s_mount_opt, GRPID);
1176                         break;
1177                 case Opt_resuid:
1178                         if (match_int(&args[0], &option))
1179                                 return 0;
1180                         sbi->s_resuid = option;
1181                         break;
1182                 case Opt_resgid:
1183                         if (match_int(&args[0], &option))
1184                                 return 0;
1185                         sbi->s_resgid = option;
1186                         break;
1187                 case Opt_sb:
1188                         /* handled by get_sb_block() instead of here */
1189                         /* *sb_block = match_int(&args[0]); */
1190                         break;
1191                 case Opt_err_panic:
1192                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1193                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1194                         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1195                         break;
1196                 case Opt_err_ro:
1197                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1198                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1199                         set_opt(sbi->s_mount_opt, ERRORS_RO);
1200                         break;
1201                 case Opt_err_cont:
1202                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1203                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1204                         set_opt(sbi->s_mount_opt, ERRORS_CONT);
1205                         break;
1206                 case Opt_nouid32:
1207                         set_opt(sbi->s_mount_opt, NO_UID32);
1208                         break;
1209                 case Opt_debug:
1210                         set_opt(sbi->s_mount_opt, DEBUG);
1211                         break;
1212                 case Opt_oldalloc:
1213                         set_opt(sbi->s_mount_opt, OLDALLOC);
1214                         break;
1215                 case Opt_orlov:
1216                         clear_opt(sbi->s_mount_opt, OLDALLOC);
1217                         break;
1218 #ifdef CONFIG_EXT4_FS_XATTR
1219                 case Opt_user_xattr:
1220                         set_opt(sbi->s_mount_opt, XATTR_USER);
1221                         break;
1222                 case Opt_nouser_xattr:
1223                         clear_opt(sbi->s_mount_opt, XATTR_USER);
1224                         break;
1225 #else
1226                 case Opt_user_xattr:
1227                 case Opt_nouser_xattr:
1228                         printk(KERN_ERR "EXT4 (no)user_xattr options "
1229                                "not supported\n");
1230                         break;
1231 #endif
1232 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1233                 case Opt_acl:
1234                         set_opt(sbi->s_mount_opt, POSIX_ACL);
1235                         break;
1236                 case Opt_noacl:
1237                         clear_opt(sbi->s_mount_opt, POSIX_ACL);
1238                         break;
1239 #else
1240                 case Opt_acl:
1241                 case Opt_noacl:
1242                         printk(KERN_ERR "EXT4 (no)acl options "
1243                                "not supported\n");
1244                         break;
1245 #endif
1246                 case Opt_journal_update:
1247                         /* @@@ FIXME */
1248                         /* Eventually we will want to be able to create
1249                            a journal file here.  For now, only allow the
1250                            user to specify an existing inode to be the
1251                            journal file. */
1252                         if (is_remount) {
1253                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1254                                        "journal on remount\n");
1255                                 return 0;
1256                         }
1257                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1258                         break;
1259                 case Opt_journal_dev:
1260                         if (is_remount) {
1261                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1262                                        "journal on remount\n");
1263                                 return 0;
1264                         }
1265                         if (match_int(&args[0], &option))
1266                                 return 0;
1267                         *journal_devnum = option;
1268                         break;
1269                 case Opt_journal_checksum:
1270                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1271                         break;
1272                 case Opt_journal_async_commit:
1273                         set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1274                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1275                         break;
1276                 case Opt_noload:
1277                         set_opt(sbi->s_mount_opt, NOLOAD);
1278                         break;
1279                 case Opt_commit:
1280                         if (match_int(&args[0], &option))
1281                                 return 0;
1282                         if (option < 0)
1283                                 return 0;
1284                         if (option == 0)
1285                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1286                         sbi->s_commit_interval = HZ * option;
1287                         break;
1288                 case Opt_max_batch_time:
1289                         if (match_int(&args[0], &option))
1290                                 return 0;
1291                         if (option < 0)
1292                                 return 0;
1293                         if (option == 0)
1294                                 option = EXT4_DEF_MAX_BATCH_TIME;
1295                         sbi->s_max_batch_time = option;
1296                         break;
1297                 case Opt_min_batch_time:
1298                         if (match_int(&args[0], &option))
1299                                 return 0;
1300                         if (option < 0)
1301                                 return 0;
1302                         sbi->s_min_batch_time = option;
1303                         break;
1304                 case Opt_data_journal:
1305                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
1306                         goto datacheck;
1307                 case Opt_data_ordered:
1308                         data_opt = EXT4_MOUNT_ORDERED_DATA;
1309                         goto datacheck;
1310                 case Opt_data_writeback:
1311                         data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1312                 datacheck:
1313                         if (is_remount) {
1314                                 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1315                                                 != data_opt) {
1316                                         printk(KERN_ERR
1317                                                 "EXT4-fs: cannot change data "
1318                                                 "mode on remount\n");
1319                                         return 0;
1320                                 }
1321                         } else {
1322                                 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1323                                 sbi->s_mount_opt |= data_opt;
1324                         }
1325                         break;
1326                 case Opt_data_err_abort:
1327                         set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1328                         break;
1329                 case Opt_data_err_ignore:
1330                         clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1331                         break;
1332 #ifdef CONFIG_QUOTA
1333                 case Opt_usrjquota:
1334                         qtype = USRQUOTA;
1335                         goto set_qf_name;
1336                 case Opt_grpjquota:
1337                         qtype = GRPQUOTA;
1338 set_qf_name:
1339                         if (sb_any_quota_loaded(sb) &&
1340                             !sbi->s_qf_names[qtype]) {
1341                                 printk(KERN_ERR
1342                                        "EXT4-fs: Cannot change journaled "
1343                                        "quota options when quota turned on.\n");
1344                                 return 0;
1345                         }
1346                         qname = match_strdup(&args[0]);
1347                         if (!qname) {
1348                                 printk(KERN_ERR
1349                                         "EXT4-fs: not enough memory for "
1350                                         "storing quotafile name.\n");
1351                                 return 0;
1352                         }
1353                         if (sbi->s_qf_names[qtype] &&
1354                             strcmp(sbi->s_qf_names[qtype], qname)) {
1355                                 printk(KERN_ERR
1356                                         "EXT4-fs: %s quota file already "
1357                                         "specified.\n", QTYPE2NAME(qtype));
1358                                 kfree(qname);
1359                                 return 0;
1360                         }
1361                         sbi->s_qf_names[qtype] = qname;
1362                         if (strchr(sbi->s_qf_names[qtype], '/')) {
1363                                 printk(KERN_ERR
1364                                         "EXT4-fs: quotafile must be on "
1365                                         "filesystem root.\n");
1366                                 kfree(sbi->s_qf_names[qtype]);
1367                                 sbi->s_qf_names[qtype] = NULL;
1368                                 return 0;
1369                         }
1370                         set_opt(sbi->s_mount_opt, QUOTA);
1371                         break;
1372                 case Opt_offusrjquota:
1373                         qtype = USRQUOTA;
1374                         goto clear_qf_name;
1375                 case Opt_offgrpjquota:
1376                         qtype = GRPQUOTA;
1377 clear_qf_name:
1378                         if (sb_any_quota_loaded(sb) &&
1379                             sbi->s_qf_names[qtype]) {
1380                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1381                                         "journaled quota options when "
1382                                         "quota turned on.\n");
1383                                 return 0;
1384                         }
1385                         /*
1386                          * The space will be released later when all options
1387                          * are confirmed to be correct
1388                          */
1389                         sbi->s_qf_names[qtype] = NULL;
1390                         break;
1391                 case Opt_jqfmt_vfsold:
1392                         qfmt = QFMT_VFS_OLD;
1393                         goto set_qf_format;
1394                 case Opt_jqfmt_vfsv0:
1395                         qfmt = QFMT_VFS_V0;
1396 set_qf_format:
1397                         if (sb_any_quota_loaded(sb) &&
1398                             sbi->s_jquota_fmt != qfmt) {
1399                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1400                                         "journaled quota options when "
1401                                         "quota turned on.\n");
1402                                 return 0;
1403                         }
1404                         sbi->s_jquota_fmt = qfmt;
1405                         break;
1406                 case Opt_quota:
1407                 case Opt_usrquota:
1408                         set_opt(sbi->s_mount_opt, QUOTA);
1409                         set_opt(sbi->s_mount_opt, USRQUOTA);
1410                         break;
1411                 case Opt_grpquota:
1412                         set_opt(sbi->s_mount_opt, QUOTA);
1413                         set_opt(sbi->s_mount_opt, GRPQUOTA);
1414                         break;
1415                 case Opt_noquota:
1416                         if (sb_any_quota_loaded(sb)) {
1417                                 printk(KERN_ERR "EXT4-fs: Cannot change quota "
1418                                         "options when quota turned on.\n");
1419                                 return 0;
1420                         }
1421                         clear_opt(sbi->s_mount_opt, QUOTA);
1422                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1423                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1424                         break;
1425 #else
1426                 case Opt_quota:
1427                 case Opt_usrquota:
1428                 case Opt_grpquota:
1429                         printk(KERN_ERR
1430                                 "EXT4-fs: quota options not supported.\n");
1431                         break;
1432                 case Opt_usrjquota:
1433                 case Opt_grpjquota:
1434                 case Opt_offusrjquota:
1435                 case Opt_offgrpjquota:
1436                 case Opt_jqfmt_vfsold:
1437                 case Opt_jqfmt_vfsv0:
1438                         printk(KERN_ERR
1439                                 "EXT4-fs: journaled quota options not "
1440                                 "supported.\n");
1441                         break;
1442                 case Opt_noquota:
1443                         break;
1444 #endif
1445                 case Opt_abort:
1446                         set_opt(sbi->s_mount_opt, ABORT);
1447                         break;
1448                 case Opt_nobarrier:
1449                         clear_opt(sbi->s_mount_opt, BARRIER);
1450                         break;
1451                 case Opt_barrier:
1452                         if (match_int(&args[0], &option)) {
1453                                 set_opt(sbi->s_mount_opt, BARRIER);
1454                                 break;
1455                         }
1456                         if (option)
1457                                 set_opt(sbi->s_mount_opt, BARRIER);
1458                         else
1459                                 clear_opt(sbi->s_mount_opt, BARRIER);
1460                         break;
1461                 case Opt_ignore:
1462                         break;
1463                 case Opt_resize:
1464                         if (!is_remount) {
1465                                 printk("EXT4-fs: resize option only available "
1466                                         "for remount\n");
1467                                 return 0;
1468                         }
1469                         if (match_int(&args[0], &option) != 0)
1470                                 return 0;
1471                         *n_blocks_count = option;
1472                         break;
1473                 case Opt_nobh:
1474                         set_opt(sbi->s_mount_opt, NOBH);
1475                         break;
1476                 case Opt_bh:
1477                         clear_opt(sbi->s_mount_opt, NOBH);
1478                         break;
1479                 case Opt_i_version:
1480                         set_opt(sbi->s_mount_opt, I_VERSION);
1481                         sb->s_flags |= MS_I_VERSION;
1482                         break;
1483                 case Opt_nodelalloc:
1484                         clear_opt(sbi->s_mount_opt, DELALLOC);
1485                         break;
1486                 case Opt_stripe:
1487                         if (match_int(&args[0], &option))
1488                                 return 0;
1489                         if (option < 0)
1490                                 return 0;
1491                         sbi->s_stripe = option;
1492                         break;
1493                 case Opt_delalloc:
1494                         set_opt(sbi->s_mount_opt, DELALLOC);
1495                         break;
1496                 case Opt_inode_readahead_blks:
1497                         if (match_int(&args[0], &option))
1498                                 return 0;
1499                         if (option < 0 || option > (1 << 30))
1500                                 return 0;
1501                         if (!is_power_of_2(option)) {
1502                                 printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
1503                                        " must be a power of 2\n");
1504                                 return 0;
1505                         }
1506                         sbi->s_inode_readahead_blks = option;
1507                         break;
1508                 case Opt_journal_ioprio:
1509                         if (match_int(&args[0], &option))
1510                                 return 0;
1511                         if (option < 0 || option > 7)
1512                                 break;
1513                         *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1514                                                             option);
1515                         break;
1516                 case Opt_noauto_da_alloc:
1517                         set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1518                         break;
1519                 case Opt_auto_da_alloc:
1520                         if (match_int(&args[0], &option)) {
1521                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1522                                 break;
1523                         }
1524                         if (option)
1525                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1526                         else
1527                                 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1528                         break;
1529                 default:
1530                         printk(KERN_ERR
1531                                "EXT4-fs: Unrecognized mount option \"%s\" "
1532                                "or missing value\n", p);
1533                         return 0;
1534                 }
1535         }
1536 #ifdef CONFIG_QUOTA
1537         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1538                 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1539                      sbi->s_qf_names[USRQUOTA])
1540                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1541
1542                 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1543                      sbi->s_qf_names[GRPQUOTA])
1544                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1545
1546                 if ((sbi->s_qf_names[USRQUOTA] &&
1547                                 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1548                     (sbi->s_qf_names[GRPQUOTA] &&
1549                                 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1550                         printk(KERN_ERR "EXT4-fs: old and new quota "
1551                                         "format mixing.\n");
1552                         return 0;
1553                 }
1554
1555                 if (!sbi->s_jquota_fmt) {
1556                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1557                                         "not specified.\n");
1558                         return 0;
1559                 }
1560         } else {
1561                 if (sbi->s_jquota_fmt) {
1562                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1563                                         "specified with no journaling "
1564                                         "enabled.\n");
1565                         return 0;
1566                 }
1567         }
1568 #endif
1569         return 1;
1570 }
1571
1572 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1573                             int read_only)
1574 {
1575         struct ext4_sb_info *sbi = EXT4_SB(sb);
1576         int res = 0;
1577
1578         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1579                 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1580                        "forcing read-only mode\n");
1581                 res = MS_RDONLY;
1582         }
1583         if (read_only)
1584                 return res;
1585         if (!(sbi->s_mount_state & EXT4_VALID_FS))
1586                 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1587                        "running e2fsck is recommended\n");
1588         else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1589                 printk(KERN_WARNING
1590                        "EXT4-fs warning: mounting fs with errors, "
1591                        "running e2fsck is recommended\n");
1592         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1593                  le16_to_cpu(es->s_mnt_count) >=
1594                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1595                 printk(KERN_WARNING
1596                        "EXT4-fs warning: maximal mount count reached, "
1597                        "running e2fsck is recommended\n");
1598         else if (le32_to_cpu(es->s_checkinterval) &&
1599                 (le32_to_cpu(es->s_lastcheck) +
1600                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1601                 printk(KERN_WARNING
1602                        "EXT4-fs warning: checktime reached, "
1603                        "running e2fsck is recommended\n");
1604         if (!sbi->s_journal) 
1605                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1606         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1607                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1608         le16_add_cpu(&es->s_mnt_count, 1);
1609         es->s_mtime = cpu_to_le32(get_seconds());
1610         ext4_update_dynamic_rev(sb);
1611         if (sbi->s_journal)
1612                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1613
1614         ext4_commit_super(sb, 1);
1615         if (test_opt(sb, DEBUG))
1616                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1617                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1618                         sb->s_blocksize,
1619                         sbi->s_groups_count,
1620                         EXT4_BLOCKS_PER_GROUP(sb),
1621                         EXT4_INODES_PER_GROUP(sb),
1622                         sbi->s_mount_opt);
1623
1624         if (EXT4_SB(sb)->s_journal) {
1625                 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1626                        sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1627                        "external", EXT4_SB(sb)->s_journal->j_devname);
1628         } else {
1629                 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1630         }
1631         return res;
1632 }
1633
1634 static int ext4_fill_flex_info(struct super_block *sb)
1635 {
1636         struct ext4_sb_info *sbi = EXT4_SB(sb);
1637         struct ext4_group_desc *gdp = NULL;
1638         struct buffer_head *bh;
1639         ext4_group_t flex_group_count;
1640         ext4_group_t flex_group;
1641         int groups_per_flex = 0;
1642         size_t size;
1643         int i;
1644
1645         if (!sbi->s_es->s_log_groups_per_flex) {
1646                 sbi->s_log_groups_per_flex = 0;
1647                 return 1;
1648         }
1649
1650         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1651         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1652
1653         /* We allocate both existing and potentially added groups */
1654         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1655                         ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1656                               EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1657         size = flex_group_count * sizeof(struct flex_groups);
1658         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1659         if (sbi->s_flex_groups == NULL) {
1660                 sbi->s_flex_groups = vmalloc(size);
1661                 if (sbi->s_flex_groups)
1662                         memset(sbi->s_flex_groups, 0, size);
1663         }
1664         if (sbi->s_flex_groups == NULL) {
1665                 printk(KERN_ERR "EXT4-fs: not enough memory for "
1666                                 "%u flex groups\n", flex_group_count);
1667                 goto failed;
1668         }
1669
1670         for (i = 0; i < sbi->s_groups_count; i++) {
1671                 gdp = ext4_get_group_desc(sb, i, &bh);
1672
1673                 flex_group = ext4_flex_group(sbi, i);
1674                 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1675                            ext4_free_inodes_count(sb, gdp));
1676                 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1677                            ext4_free_blks_count(sb, gdp));
1678                 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1679                            ext4_used_dirs_count(sb, gdp));
1680         }
1681
1682         return 1;
1683 failed:
1684         return 0;
1685 }
1686
1687 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1688                             struct ext4_group_desc *gdp)
1689 {
1690         __u16 crc = 0;
1691
1692         if (sbi->s_es->s_feature_ro_compat &
1693             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1694                 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1695                 __le32 le_group = cpu_to_le32(block_group);
1696
1697                 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1698                 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1699                 crc = crc16(crc, (__u8 *)gdp, offset);
1700                 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1701                 /* for checksum of struct ext4_group_desc do the rest...*/
1702                 if ((sbi->s_es->s_feature_incompat &
1703                      cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1704                     offset < le16_to_cpu(sbi->s_es->s_desc_size))
1705                         crc = crc16(crc, (__u8 *)gdp + offset,
1706                                     le16_to_cpu(sbi->s_es->s_desc_size) -
1707                                         offset);
1708         }
1709
1710         return cpu_to_le16(crc);
1711 }
1712
1713 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1714                                 struct ext4_group_desc *gdp)
1715 {
1716         if ((sbi->s_es->s_feature_ro_compat &
1717              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1718             (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1719                 return 0;
1720
1721         return 1;
1722 }
1723
1724 /* Called at mount-time, super-block is locked */
1725 static int ext4_check_descriptors(struct super_block *sb)
1726 {
1727         struct ext4_sb_info *sbi = EXT4_SB(sb);
1728         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1729         ext4_fsblk_t last_block;
1730         ext4_fsblk_t block_bitmap;
1731         ext4_fsblk_t inode_bitmap;
1732         ext4_fsblk_t inode_table;
1733         int flexbg_flag = 0;
1734         ext4_group_t i;
1735
1736         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1737                 flexbg_flag = 1;
1738
1739         ext4_debug("Checking group descriptors");
1740
1741         for (i = 0; i < sbi->s_groups_count; i++) {
1742                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1743
1744                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1745                         last_block = ext4_blocks_count(sbi->s_es) - 1;
1746                 else
1747                         last_block = first_block +
1748                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1749
1750                 block_bitmap = ext4_block_bitmap(sb, gdp);
1751                 if (block_bitmap < first_block || block_bitmap > last_block) {
1752                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1753                                "Block bitmap for group %u not in group "
1754                                "(block %llu)!\n", i, block_bitmap);
1755                         return 0;
1756                 }
1757                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1758                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1759                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1760                                "Inode bitmap for group %u not in group "
1761                                "(block %llu)!\n", i, inode_bitmap);
1762                         return 0;
1763                 }
1764                 inode_table = ext4_inode_table(sb, gdp);
1765                 if (inode_table < first_block ||
1766                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
1767                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1768                                "Inode table for group %u not in group "
1769                                "(block %llu)!\n", i, inode_table);
1770                         return 0;
1771                 }
1772                 spin_lock(sb_bgl_lock(sbi, i));
1773                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1774                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1775                                "Checksum for group %u failed (%u!=%u)\n",
1776                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1777                                gdp)), le16_to_cpu(gdp->bg_checksum));
1778                         if (!(sb->s_flags & MS_RDONLY)) {
1779                                 spin_unlock(sb_bgl_lock(sbi, i));
1780                                 return 0;
1781                         }
1782                 }
1783                 spin_unlock(sb_bgl_lock(sbi, i));
1784                 if (!flexbg_flag)
1785                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
1786         }
1787
1788         ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1789         sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1790         return 1;
1791 }
1792
1793 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1794  * the superblock) which were deleted from all directories, but held open by
1795  * a process at the time of a crash.  We walk the list and try to delete these
1796  * inodes at recovery time (only with a read-write filesystem).
1797  *
1798  * In order to keep the orphan inode chain consistent during traversal (in
1799  * case of crash during recovery), we link each inode into the superblock
1800  * orphan list_head and handle it the same way as an inode deletion during
1801  * normal operation (which journals the operations for us).
1802  *
1803  * We only do an iget() and an iput() on each inode, which is very safe if we
1804  * accidentally point at an in-use or already deleted inode.  The worst that
1805  * can happen in this case is that we get a "bit already cleared" message from
1806  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1807  * e2fsck was run on this filesystem, and it must have already done the orphan
1808  * inode cleanup for us, so we can safely abort without any further action.
1809  */
1810 static void ext4_orphan_cleanup(struct super_block *sb,
1811                                 struct ext4_super_block *es)
1812 {
1813         unsigned int s_flags = sb->s_flags;
1814         int nr_orphans = 0, nr_truncates = 0;
1815 #ifdef CONFIG_QUOTA
1816         int i;
1817 #endif
1818         if (!es->s_last_orphan) {
1819                 jbd_debug(4, "no orphan inodes to clean up\n");
1820                 return;
1821         }
1822
1823         if (bdev_read_only(sb->s_bdev)) {
1824                 printk(KERN_ERR "EXT4-fs: write access "
1825                         "unavailable, skipping orphan cleanup.\n");
1826                 return;
1827         }
1828
1829         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1830                 if (es->s_last_orphan)
1831                         jbd_debug(1, "Errors on filesystem, "
1832                                   "clearing orphan list.\n");
1833                 es->s_last_orphan = 0;
1834                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1835                 return;
1836         }
1837
1838         if (s_flags & MS_RDONLY) {
1839                 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1840                        sb->s_id);
1841                 sb->s_flags &= ~MS_RDONLY;
1842         }
1843 #ifdef CONFIG_QUOTA
1844         /* Needed for iput() to work correctly and not trash data */
1845         sb->s_flags |= MS_ACTIVE;
1846         /* Turn on quotas so that they are updated correctly */
1847         for (i = 0; i < MAXQUOTAS; i++) {
1848                 if (EXT4_SB(sb)->s_qf_names[i]) {
1849                         int ret = ext4_quota_on_mount(sb, i);
1850                         if (ret < 0)
1851                                 printk(KERN_ERR
1852                                         "EXT4-fs: Cannot turn on journaled "
1853                                         "quota: error %d\n", ret);
1854                 }
1855         }
1856 #endif
1857
1858         while (es->s_last_orphan) {
1859                 struct inode *inode;
1860
1861                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1862                 if (IS_ERR(inode)) {
1863                         es->s_last_orphan = 0;
1864                         break;
1865                 }
1866
1867                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1868                 vfs_dq_init(inode);
1869                 if (inode->i_nlink) {
1870                         printk(KERN_DEBUG
1871                                 "%s: truncating inode %lu to %lld bytes\n",
1872                                 __func__, inode->i_ino, inode->i_size);
1873                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1874                                   inode->i_ino, inode->i_size);
1875                         ext4_truncate(inode);
1876                         nr_truncates++;
1877                 } else {
1878                         printk(KERN_DEBUG
1879                                 "%s: deleting unreferenced inode %lu\n",
1880                                 __func__, inode->i_ino);
1881                         jbd_debug(2, "deleting unreferenced inode %lu\n",
1882                                   inode->i_ino);
1883                         nr_orphans++;
1884                 }
1885                 iput(inode);  /* The delete magic happens here! */
1886         }
1887
1888 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1889
1890         if (nr_orphans)
1891                 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1892                        sb->s_id, PLURAL(nr_orphans));
1893         if (nr_truncates)
1894                 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1895                        sb->s_id, PLURAL(nr_truncates));
1896 #ifdef CONFIG_QUOTA
1897         /* Turn quotas off */
1898         for (i = 0; i < MAXQUOTAS; i++) {
1899                 if (sb_dqopt(sb)->files[i])
1900                         vfs_quota_off(sb, i, 0);
1901         }
1902 #endif
1903         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1904 }
1905 /*
1906  * Maximal extent format file size.
1907  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1908  * extent format containers, within a sector_t, and within i_blocks
1909  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1910  * so that won't be a limiting factor.
1911  *
1912  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1913  */
1914 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1915 {
1916         loff_t res;
1917         loff_t upper_limit = MAX_LFS_FILESIZE;
1918
1919         /* small i_blocks in vfs inode? */
1920         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1921                 /*
1922                  * CONFIG_LBD is not enabled implies the inode
1923                  * i_block represent total blocks in 512 bytes
1924                  * 32 == size of vfs inode i_blocks * 8
1925                  */
1926                 upper_limit = (1LL << 32) - 1;
1927
1928                 /* total blocks in file system block size */
1929                 upper_limit >>= (blkbits - 9);
1930                 upper_limit <<= blkbits;
1931         }
1932
1933         /* 32-bit extent-start container, ee_block */
1934         res = 1LL << 32;
1935         res <<= blkbits;
1936         res -= 1;
1937
1938         /* Sanity check against vm- & vfs- imposed limits */
1939         if (res > upper_limit)
1940                 res = upper_limit;
1941
1942         return res;
1943 }
1944
1945 /*
1946  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1947  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1948  * We need to be 1 filesystem block less than the 2^48 sector limit.
1949  */
1950 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1951 {
1952         loff_t res = EXT4_NDIR_BLOCKS;
1953         int meta_blocks;
1954         loff_t upper_limit;
1955         /* This is calculated to be the largest file size for a
1956          * dense, bitmapped file such that the total number of
1957          * sectors in the file, including data and all indirect blocks,
1958          * does not exceed 2^48 -1
1959          * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1960          * total number of  512 bytes blocks of the file
1961          */
1962
1963         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1964                 /*
1965                  * !has_huge_files or CONFIG_LBD is not enabled
1966                  * implies the inode i_block represent total blocks in
1967                  * 512 bytes 32 == size of vfs inode i_blocks * 8
1968                  */
1969                 upper_limit = (1LL << 32) - 1;
1970
1971                 /* total blocks in file system block size */
1972                 upper_limit >>= (bits - 9);
1973
1974         } else {
1975                 /*
1976                  * We use 48 bit ext4_inode i_blocks
1977                  * With EXT4_HUGE_FILE_FL set the i_blocks
1978                  * represent total number of blocks in
1979                  * file system block size
1980                  */
1981                 upper_limit = (1LL << 48) - 1;
1982
1983         }
1984
1985         /* indirect blocks */
1986         meta_blocks = 1;
1987         /* double indirect blocks */
1988         meta_blocks += 1 + (1LL << (bits-2));
1989         /* tripple indirect blocks */
1990         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1991
1992         upper_limit -= meta_blocks;
1993         upper_limit <<= bits;
1994
1995         res += 1LL << (bits-2);
1996         res += 1LL << (2*(bits-2));
1997         res += 1LL << (3*(bits-2));
1998         res <<= bits;
1999         if (res > upper_limit)
2000                 res = upper_limit;
2001
2002         if (res > MAX_LFS_FILESIZE)
2003                 res = MAX_LFS_FILESIZE;
2004
2005         return res;
2006 }
2007
2008 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2009                                 ext4_fsblk_t logical_sb_block, int nr)
2010 {
2011         struct ext4_sb_info *sbi = EXT4_SB(sb);
2012         ext4_group_t bg, first_meta_bg;
2013         int has_super = 0;
2014
2015         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2016
2017         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2018             nr < first_meta_bg)
2019                 return logical_sb_block + nr + 1;
2020         bg = sbi->s_desc_per_block * nr;
2021         if (ext4_bg_has_super(sb, bg))
2022                 has_super = 1;
2023         return (has_super + ext4_group_first_block_no(sb, bg));
2024 }
2025
2026 /**
2027  * ext4_get_stripe_size: Get the stripe size.
2028  * @sbi: In memory super block info
2029  *
2030  * If we have specified it via mount option, then
2031  * use the mount option value. If the value specified at mount time is
2032  * greater than the blocks per group use the super block value.
2033  * If the super block value is greater than blocks per group return 0.
2034  * Allocator needs it be less than blocks per group.
2035  *
2036  */
2037 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2038 {
2039         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2040         unsigned long stripe_width =
2041                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2042
2043         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2044                 return sbi->s_stripe;
2045
2046         if (stripe_width <= sbi->s_blocks_per_group)
2047                 return stripe_width;
2048
2049         if (stride <= sbi->s_blocks_per_group)
2050                 return stride;
2051
2052         return 0;
2053 }
2054
2055 /* sysfs supprt */
2056
2057 struct ext4_attr {
2058         struct attribute attr;
2059         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2060         ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, 
2061                          const char *, size_t);
2062         int offset;
2063 };
2064
2065 static int parse_strtoul(const char *buf,
2066                 unsigned long max, unsigned long *value)
2067 {
2068         char *endp;
2069
2070         while (*buf && isspace(*buf))
2071                 buf++;
2072         *value = simple_strtoul(buf, &endp, 0);
2073         while (*endp && isspace(*endp))
2074                 endp++;
2075         if (*endp || *value > max)
2076                 return -EINVAL;
2077
2078         return 0;
2079 }
2080
2081 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2082                                               struct ext4_sb_info *sbi,
2083                                               char *buf)
2084 {
2085         return snprintf(buf, PAGE_SIZE, "%llu\n",
2086                         (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2087 }
2088
2089 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2090                                          struct ext4_sb_info *sbi, char *buf)
2091 {
2092         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2093
2094         return snprintf(buf, PAGE_SIZE, "%lu\n",
2095                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2096                          sbi->s_sectors_written_start) >> 1);
2097 }
2098
2099 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2100                                           struct ext4_sb_info *sbi, char *buf)
2101 {
2102         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2103
2104         return snprintf(buf, PAGE_SIZE, "%llu\n",
2105                         sbi->s_kbytes_written + 
2106                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2107                           EXT4_SB(sb)->s_sectors_written_start) >> 1));
2108 }
2109
2110 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2111                                           struct ext4_sb_info *sbi,
2112                                           const char *buf, size_t count)
2113 {
2114         unsigned long t;
2115
2116         if (parse_strtoul(buf, 0x40000000, &t))
2117                 return -EINVAL;
2118
2119         if (!is_power_of_2(t))
2120                 return -EINVAL;
2121
2122         sbi->s_inode_readahead_blks = t;
2123         return count;
2124 }
2125
2126 static ssize_t sbi_ui_show(struct ext4_attr *a,
2127                                 struct ext4_sb_info *sbi, char *buf)
2128 {
2129         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2130
2131         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2132 }
2133
2134 static ssize_t sbi_ui_store(struct ext4_attr *a,
2135                             struct ext4_sb_info *sbi,
2136                             const char *buf, size_t count)
2137 {
2138         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2139         unsigned long t;
2140
2141         if (parse_strtoul(buf, 0xffffffff, &t))
2142                 return -EINVAL;
2143         *ui = t;
2144         return count;
2145 }
2146
2147 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2148 static struct ext4_attr ext4_attr_##_name = {                   \
2149         .attr = {.name = __stringify(_name), .mode = _mode },   \
2150         .show   = _show,                                        \
2151         .store  = _store,                                       \
2152         .offset = offsetof(struct ext4_sb_info, _elname),       \
2153 }
2154 #define EXT4_ATTR(name, mode, show, store) \
2155 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2156
2157 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2158 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2159 #define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2160         EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2161 #define ATTR_LIST(name) &ext4_attr_##name.attr
2162
2163 EXT4_RO_ATTR(delayed_allocation_blocks);
2164 EXT4_RO_ATTR(session_write_kbytes);
2165 EXT4_RO_ATTR(lifetime_write_kbytes);
2166 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2167                  inode_readahead_blks_store, s_inode_readahead_blks);
2168 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2169 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2170 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2171 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2172 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2173 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2174
2175 static struct attribute *ext4_attrs[] = {
2176         ATTR_LIST(delayed_allocation_blocks),
2177         ATTR_LIST(session_write_kbytes),
2178         ATTR_LIST(lifetime_write_kbytes),
2179         ATTR_LIST(inode_readahead_blks),
2180         ATTR_LIST(mb_stats),
2181         ATTR_LIST(mb_max_to_scan),
2182         ATTR_LIST(mb_min_to_scan),
2183         ATTR_LIST(mb_order2_req),
2184         ATTR_LIST(mb_stream_req),
2185         ATTR_LIST(mb_group_prealloc),
2186         NULL,
2187 };
2188
2189 static ssize_t ext4_attr_show(struct kobject *kobj,
2190                               struct attribute *attr, char *buf)
2191 {
2192         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2193                                                 s_kobj);
2194         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2195
2196         return a->show ? a->show(a, sbi, buf) : 0;
2197 }
2198
2199 static ssize_t ext4_attr_store(struct kobject *kobj,
2200                                struct attribute *attr,
2201                                const char *buf, size_t len)
2202 {
2203         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2204                                                 s_kobj);
2205         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2206
2207         return a->store ? a->store(a, sbi, buf, len) : 0;
2208 }
2209
2210 static void ext4_sb_release(struct kobject *kobj)
2211 {
2212         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2213                                                 s_kobj);
2214         complete(&sbi->s_kobj_unregister);
2215 }
2216
2217
2218 static struct sysfs_ops ext4_attr_ops = {
2219         .show   = ext4_attr_show,
2220         .store  = ext4_attr_store,
2221 };
2222
2223 static struct kobj_type ext4_ktype = {
2224         .default_attrs  = ext4_attrs,
2225         .sysfs_ops      = &ext4_attr_ops,
2226         .release        = ext4_sb_release,
2227 };
2228
2229 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2230                                 __releases(kernel_lock)
2231                                 __acquires(kernel_lock)
2232
2233 {
2234         struct buffer_head *bh;
2235         struct ext4_super_block *es = NULL;
2236         struct ext4_sb_info *sbi;
2237         ext4_fsblk_t block;
2238         ext4_fsblk_t sb_block = get_sb_block(&data);
2239         ext4_fsblk_t logical_sb_block;
2240         unsigned long offset = 0;
2241         unsigned long journal_devnum = 0;
2242         unsigned long def_mount_opts;
2243         struct inode *root;
2244         char *cp;
2245         const char *descr;
2246         int ret = -EINVAL;
2247         int blocksize;
2248         unsigned int db_count;
2249         unsigned int i;
2250         int needs_recovery, has_huge_files;
2251         int features;
2252         __u64 blocks_count;
2253         int err;
2254         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2255
2256         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2257         if (!sbi)
2258                 return -ENOMEM;
2259
2260         sbi->s_blockgroup_lock =
2261                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2262         if (!sbi->s_blockgroup_lock) {
2263                 kfree(sbi);
2264                 return -ENOMEM;
2265         }
2266         sb->s_fs_info = sbi;
2267         sbi->s_mount_opt = 0;
2268         sbi->s_resuid = EXT4_DEF_RESUID;
2269         sbi->s_resgid = EXT4_DEF_RESGID;
2270         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2271         sbi->s_sb_block = sb_block;
2272         sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2273                                                       sectors[1]);
2274
2275         unlock_kernel();
2276
2277         /* Cleanup superblock name */
2278         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2279                 *cp = '!';
2280
2281         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2282         if (!blocksize) {
2283                 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2284                 goto out_fail;
2285         }
2286
2287         /*
2288          * The ext4 superblock will not be buffer aligned for other than 1kB
2289          * block sizes.  We need to calculate the offset from buffer start.
2290          */
2291         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2292                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2293                 offset = do_div(logical_sb_block, blocksize);
2294         } else {
2295                 logical_sb_block = sb_block;
2296         }
2297
2298         if (!(bh = sb_bread(sb, logical_sb_block))) {
2299                 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2300                 goto out_fail;
2301         }
2302         /*
2303          * Note: s_es must be initialized as soon as possible because
2304          *       some ext4 macro-instructions depend on its value
2305          */
2306         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2307         sbi->s_es = es;
2308         sb->s_magic = le16_to_cpu(es->s_magic);
2309         if (sb->s_magic != EXT4_SUPER_MAGIC)
2310                 goto cantfind_ext4;
2311         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2312
2313         /* Set defaults before we parse the mount options */
2314         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2315         if (def_mount_opts & EXT4_DEFM_DEBUG)
2316                 set_opt(sbi->s_mount_opt, DEBUG);
2317         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2318                 set_opt(sbi->s_mount_opt, GRPID);
2319         if (def_mount_opts & EXT4_DEFM_UID16)
2320                 set_opt(sbi->s_mount_opt, NO_UID32);
2321 #ifdef CONFIG_EXT4_FS_XATTR
2322         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2323                 set_opt(sbi->s_mount_opt, XATTR_USER);
2324 #endif
2325 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2326         if (def_mount_opts & EXT4_DEFM_ACL)
2327                 set_opt(sbi->s_mount_opt, POSIX_ACL);
2328 #endif
2329         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2330                 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2331         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2332                 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2333         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2334                 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2335
2336         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2337                 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2338         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2339                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2340         else
2341                 set_opt(sbi->s_mount_opt, ERRORS_RO);
2342
2343         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2344         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2345         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2346         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2347         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2348
2349         set_opt(sbi->s_mount_opt, BARRIER);
2350
2351         /*
2352          * enable delayed allocation by default
2353          * Use -o nodelalloc to turn it off
2354          */
2355         set_opt(sbi->s_mount_opt, DELALLOC);
2356
2357
2358         if (!parse_options((char *) data, sb, &journal_devnum,
2359                            &journal_ioprio, NULL, 0))
2360                 goto failed_mount;
2361
2362         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2363                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2364
2365         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2366             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2367              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2368              EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2369                 printk(KERN_WARNING
2370                        "EXT4-fs warning: feature flags set on rev 0 fs, "
2371                        "running e2fsck is recommended\n");
2372
2373         /*
2374          * Check feature flags regardless of the revision level, since we
2375          * previously didn't change the revision level when setting the flags,
2376          * so there is a chance incompat flags are set on a rev 0 filesystem.
2377          */
2378         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2379         if (features) {
2380                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2381                        "unsupported optional features (%x).\n", sb->s_id,
2382                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2383                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2384                 goto failed_mount;
2385         }
2386         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2387         if (!(sb->s_flags & MS_RDONLY) && features) {
2388                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2389                        "unsupported optional features (%x).\n", sb->s_id,
2390                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2391                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
2392                 goto failed_mount;
2393         }
2394         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2395                                     EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2396         if (has_huge_files) {
2397                 /*
2398                  * Large file size enabled file system can only be
2399                  * mount if kernel is build with CONFIG_LBD
2400                  */
2401                 if (sizeof(root->i_blocks) < sizeof(u64) &&
2402                                 !(sb->s_flags & MS_RDONLY)) {
2403                         printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2404                                         "files cannot be mounted read-write "
2405                                         "without CONFIG_LBD.\n", sb->s_id);
2406                         goto failed_mount;
2407                 }
2408         }
2409         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2410
2411         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2412             blocksize > EXT4_MAX_BLOCK_SIZE) {
2413                 printk(KERN_ERR
2414                        "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2415                        blocksize, sb->s_id);
2416                 goto failed_mount;
2417         }
2418
2419         if (sb->s_blocksize != blocksize) {
2420
2421                 /* Validate the filesystem blocksize */
2422                 if (!sb_set_blocksize(sb, blocksize)) {
2423                         printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2424                                         blocksize);
2425                         goto failed_mount;
2426                 }
2427
2428                 brelse(bh);
2429                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2430                 offset = do_div(logical_sb_block, blocksize);
2431                 bh = sb_bread(sb, logical_sb_block);
2432                 if (!bh) {
2433                         printk(KERN_ERR
2434                                "EXT4-fs: Can't read superblock on 2nd try.\n");
2435                         goto failed_mount;
2436                 }
2437                 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2438                 sbi->s_es = es;
2439                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2440                         printk(KERN_ERR
2441                                "EXT4-fs: Magic mismatch, very weird !\n");
2442                         goto failed_mount;
2443                 }
2444         }
2445
2446         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2447                                                       has_huge_files);
2448         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2449
2450         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2451                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2452                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2453         } else {
2454                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2455                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2456                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2457                     (!is_power_of_2(sbi->s_inode_size)) ||
2458                     (sbi->s_inode_size > blocksize)) {
2459                         printk(KERN_ERR
2460                                "EXT4-fs: unsupported inode size: %d\n",
2461                                sbi->s_inode_size);
2462                         goto failed_mount;
2463                 }
2464                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2465                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2466         }
2467         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2468         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2469                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2470                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2471                     !is_power_of_2(sbi->s_desc_size)) {
2472                         printk(KERN_ERR
2473                                "EXT4-fs: unsupported descriptor size %lu\n",
2474                                sbi->s_desc_size);
2475                         goto failed_mount;
2476                 }
2477         } else
2478                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2479         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2480         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2481         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2482                 goto cantfind_ext4;
2483         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2484         if (sbi->s_inodes_per_block == 0)
2485                 goto cantfind_ext4;
2486         sbi->s_itb_per_group = sbi->s_inodes_per_group /
2487                                         sbi->s_inodes_per_block;
2488         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2489         sbi->s_sbh = bh;
2490         sbi->s_mount_state = le16_to_cpu(es->s_state);
2491         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2492         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2493         for (i = 0; i < 4; i++)
2494                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2495         sbi->s_def_hash_version = es->s_def_hash_version;
2496         i = le32_to_cpu(es->s_flags);
2497         if (i & EXT2_FLAGS_UNSIGNED_HASH)
2498                 sbi->s_hash_unsigned = 3;
2499         else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2500 #ifdef __CHAR_UNSIGNED__
2501                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2502                 sbi->s_hash_unsigned = 3;
2503 #else
2504                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2505 #endif
2506                 sb->s_dirt = 1;
2507         }
2508
2509         if (sbi->s_blocks_per_group > blocksize * 8) {
2510                 printk(KERN_ERR
2511                        "EXT4-fs: #blocks per group too big: %lu\n",
2512                        sbi->s_blocks_per_group);
2513                 goto failed_mount;
2514         }
2515         if (sbi->s_inodes_per_group > blocksize * 8) {
2516                 printk(KERN_ERR
2517                        "EXT4-fs: #inodes per group too big: %lu\n",
2518                        sbi->s_inodes_per_group);
2519                 goto failed_mount;
2520         }
2521
2522         if (ext4_blocks_count(es) >
2523                     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2524                 printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2525                         " too large to mount safely\n", sb->s_id);
2526                 if (sizeof(sector_t) < 8)
2527                         printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2528                                         "enabled\n");
2529                 goto failed_mount;
2530         }
2531
2532         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2533                 goto cantfind_ext4;
2534
2535         /* check blocks count against device size */
2536         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2537         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2538                 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2539                        "exceeds size of device (%llu blocks)\n",
2540                        ext4_blocks_count(es), blocks_count);
2541                 goto failed_mount;
2542         }
2543
2544         /*
2545          * It makes no sense for the first data block to be beyond the end
2546          * of the filesystem.
2547          */
2548         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2549                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2550                        "block %u is beyond end of filesystem (%llu)\n",
2551                        le32_to_cpu(es->s_first_data_block),
2552                        ext4_blocks_count(es));
2553                 goto failed_mount;
2554         }
2555         blocks_count = (ext4_blocks_count(es) -
2556                         le32_to_cpu(es->s_first_data_block) +
2557                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
2558         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2559         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2560                 printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2561                        "(block count %llu, first data block %u, "
2562                        "blocks per group %lu)\n", sbi->s_groups_count,
2563                        ext4_blocks_count(es),
2564                        le32_to_cpu(es->s_first_data_block),
2565                        EXT4_BLOCKS_PER_GROUP(sb));
2566                 goto failed_mount;
2567         }
2568         sbi->s_groups_count = blocks_count;
2569         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2570                    EXT4_DESC_PER_BLOCK(sb);
2571         sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2572                                     GFP_KERNEL);
2573         if (sbi->s_group_desc == NULL) {
2574                 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2575                 goto failed_mount;
2576         }
2577
2578 #ifdef CONFIG_PROC_FS
2579         if (ext4_proc_root)
2580                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2581 #endif
2582
2583         bgl_lock_init(sbi->s_blockgroup_lock);
2584
2585         for (i = 0; i < db_count; i++) {
2586                 block = descriptor_loc(sb, logical_sb_block, i);
2587                 sbi->s_group_desc[i] = sb_bread(sb, block);
2588                 if (!sbi->s_group_desc[i]) {
2589                         printk(KERN_ERR "EXT4-fs: "
2590                                "can't read group descriptor %d\n", i);
2591                         db_count = i;
2592                         goto failed_mount2;
2593                 }
2594         }
2595         if (!ext4_check_descriptors(sb)) {
2596                 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2597                 goto failed_mount2;
2598         }
2599         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2600                 if (!ext4_fill_flex_info(sb)) {
2601                         printk(KERN_ERR
2602                                "EXT4-fs: unable to initialize "
2603                                "flex_bg meta info!\n");
2604                         goto failed_mount2;
2605                 }
2606
2607         sbi->s_gdb_count = db_count;
2608         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2609         spin_lock_init(&sbi->s_next_gen_lock);
2610
2611         err = percpu_counter_init(&sbi->s_freeblocks_counter,
2612                         ext4_count_free_blocks(sb));
2613         if (!err) {
2614                 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2615                                 ext4_count_free_inodes(sb));
2616         }
2617         if (!err) {
2618                 err = percpu_counter_init(&sbi->s_dirs_counter,
2619                                 ext4_count_dirs(sb));
2620         }
2621         if (!err) {
2622                 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2623         }
2624         if (err) {
2625                 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2626                 goto failed_mount3;
2627         }
2628
2629         sbi->s_stripe = ext4_get_stripe_size(sbi);
2630
2631         /*
2632          * set up enough so that it can read an inode
2633          */
2634         if (!test_opt(sb, NOLOAD) &&
2635             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2636                 sb->s_op = &ext4_sops;
2637         else
2638                 sb->s_op = &ext4_nojournal_sops;
2639         sb->s_export_op = &ext4_export_ops;
2640         sb->s_xattr = ext4_xattr_handlers;
2641 #ifdef CONFIG_QUOTA
2642         sb->s_qcop = &ext4_qctl_operations;
2643         sb->dq_op = &ext4_quota_operations;
2644 #endif
2645         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2646         mutex_init(&sbi->s_orphan_lock);
2647         mutex_init(&sbi->s_resize_lock);
2648
2649         sb->s_root = NULL;
2650
2651         needs_recovery = (es->s_last_orphan != 0 ||
2652                           EXT4_HAS_INCOMPAT_FEATURE(sb,
2653                                     EXT4_FEATURE_INCOMPAT_RECOVER));
2654
2655         /*
2656          * The first inode we look at is the journal inode.  Don't try
2657          * root first: it may be modified in the journal!
2658          */
2659         if (!test_opt(sb, NOLOAD) &&
2660             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2661                 if (ext4_load_journal(sb, es, journal_devnum))
2662                         goto failed_mount3;
2663                 if (!(sb->s_flags & MS_RDONLY) &&
2664                     EXT4_SB(sb)->s_journal->j_failed_commit) {
2665                         printk(KERN_CRIT "EXT4-fs error (device %s): "
2666                                "ext4_fill_super: Journal transaction "
2667                                "%u is corrupt\n", sb->s_id,
2668                                EXT4_SB(sb)->s_journal->j_failed_commit);
2669                         if (test_opt(sb, ERRORS_RO)) {
2670                                 printk(KERN_CRIT
2671                                        "Mounting filesystem read-only\n");
2672                                 sb->s_flags |= MS_RDONLY;
2673                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2674                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2675                         }
2676                         if (test_opt(sb, ERRORS_PANIC)) {
2677                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2678                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2679                                 ext4_commit_super(sb, 1);
2680                                 goto failed_mount4;
2681                         }
2682                 }
2683         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2684               EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2685                 printk(KERN_ERR "EXT4-fs: required journal recovery "
2686                        "suppressed and not mounted read-only\n");
2687                 goto failed_mount4;
2688         } else {
2689                 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2690                 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2691                 sbi->s_journal = NULL;
2692                 needs_recovery = 0;
2693                 goto no_journal;
2694         }
2695
2696         if (ext4_blocks_count(es) > 0xffffffffULL &&
2697             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2698                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
2699                 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2700                 goto failed_mount4;
2701         }
2702
2703         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2704                 jbd2_journal_set_features(sbi->s_journal,
2705                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2706                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2707         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2708                 jbd2_journal_set_features(sbi->s_journal,
2709                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2710                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2711                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2712         } else {
2713                 jbd2_journal_clear_features(sbi->s_journal,
2714                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2715                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2716         }
2717
2718         /* We have now updated the journal if required, so we can
2719          * validate the data journaling mode. */
2720         switch (test_opt(sb, DATA_FLAGS)) {
2721         case 0:
2722                 /* No mode set, assume a default based on the journal
2723                  * capabilities: ORDERED_DATA if the journal can
2724                  * cope, else JOURNAL_DATA
2725                  */
2726                 if (jbd2_journal_check_available_features
2727                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2728                         set_opt(sbi->s_mount_opt, ORDERED_DATA);
2729                 else
2730                         set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2731                 break;
2732
2733         case EXT4_MOUNT_ORDERED_DATA:
2734         case EXT4_MOUNT_WRITEBACK_DATA:
2735                 if (!jbd2_journal_check_available_features
2736                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2737                         printk(KERN_ERR "EXT4-fs: Journal does not support "
2738                                "requested data journaling mode\n");
2739                         goto failed_mount4;
2740                 }
2741         default:
2742                 break;
2743         }
2744         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2745
2746 no_journal:
2747
2748         if (test_opt(sb, NOBH)) {
2749                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2750                         printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2751                                 "its supported only with writeback mode\n");
2752                         clear_opt(sbi->s_mount_opt, NOBH);
2753                 }
2754         }
2755         /*
2756          * The jbd2_journal_load will have done any necessary log recovery,
2757          * so we can safely mount the rest of the filesystem now.
2758          */
2759
2760         root = ext4_iget(sb, EXT4_ROOT_INO);
2761         if (IS_ERR(root)) {
2762                 printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2763                 ret = PTR_ERR(root);
2764                 goto failed_mount4;
2765         }
2766         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2767                 iput(root);
2768                 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2769                 goto failed_mount4;
2770         }
2771         sb->s_root = d_alloc_root(root);
2772         if (!sb->s_root) {
2773                 printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2774                 iput(root);
2775                 ret = -ENOMEM;
2776                 goto failed_mount4;
2777         }
2778
2779         ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2780
2781         /* determine the minimum size of new large inodes, if present */
2782         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2783                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2784                                                      EXT4_GOOD_OLD_INODE_SIZE;
2785                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2786                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2787                         if (sbi->s_want_extra_isize <
2788                             le16_to_cpu(es->s_want_extra_isize))
2789                                 sbi->s_want_extra_isize =
2790                                         le16_to_cpu(es->s_want_extra_isize);
2791                         if (sbi->s_want_extra_isize <
2792                             le16_to_cpu(es->s_min_extra_isize))
2793                                 sbi->s_want_extra_isize =
2794                                         le16_to_cpu(es->s_min_extra_isize);
2795                 }
2796         }
2797         /* Check if enough inode space is available */
2798         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2799                                                         sbi->s_inode_size) {
2800                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2801                                                        EXT4_GOOD_OLD_INODE_SIZE;
2802                 printk(KERN_INFO "EXT4-fs: required extra inode space not"
2803                         "available.\n");
2804         }
2805
2806         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2807                 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2808                                 "requested data journaling mode\n");
2809                 clear_opt(sbi->s_mount_opt, DELALLOC);
2810         } else if (test_opt(sb, DELALLOC))
2811                 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2812
2813         ext4_ext_init(sb);
2814         err = ext4_mb_init(sb, needs_recovery);
2815         if (err) {
2816                 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2817                        err);
2818                 goto failed_mount4;
2819         }
2820
2821         sbi->s_kobj.kset = ext4_kset;
2822         init_completion(&sbi->s_kobj_unregister);
2823         err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2824                                    "%s", sb->s_id);
2825         if (err) {
2826                 ext4_mb_release(sb);
2827                 ext4_ext_release(sb);
2828                 goto failed_mount4;
2829         };
2830
2831         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2832         ext4_orphan_cleanup(sb, es);
2833         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2834         if (needs_recovery) {
2835                 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2836                 ext4_mark_recovery_complete(sb, es);
2837         }
2838         if (EXT4_SB(sb)->s_journal) {
2839                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2840                         descr = " journalled data mode";
2841                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2842                         descr = " ordered data mode";
2843                 else
2844                         descr = " writeback data mode";
2845         } else
2846                 descr = "out journal";
2847
2848         printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2849                sb->s_id, descr);
2850
2851         lock_kernel();
2852         return 0;
2853
2854 cantfind_ext4:
2855         if (!silent)
2856                 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2857                        sb->s_id);
2858         goto failed_mount;
2859
2860 failed_mount4:
2861         printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2862         if (sbi->s_journal) {
2863                 jbd2_journal_destroy(sbi->s_journal);
2864                 sbi->s_journal = NULL;
2865         }
2866 failed_mount3:
2867         if (sbi->s_flex_groups) {
2868                 if (is_vmalloc_addr(sbi->s_flex_groups))
2869                         vfree(sbi->s_flex_groups);
2870                 else
2871                         kfree(sbi->s_flex_groups);
2872         }
2873         percpu_counter_destroy(&sbi->s_freeblocks_counter);
2874         percpu_counter_destroy(&sbi->s_freeinodes_counter);
2875         percpu_counter_destroy(&sbi->s_dirs_counter);
2876         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2877 failed_mount2:
2878         for (i = 0; i < db_count; i++)
2879                 brelse(sbi->s_group_desc[i]);
2880         kfree(sbi->s_group_desc);
2881 failed_mount:
2882         if (sbi->s_proc) {
2883                 remove_proc_entry(sb->s_id, ext4_proc_root);
2884         }
2885 #ifdef CONFIG_QUOTA
2886         for (i = 0; i < MAXQUOTAS; i++)
2887                 kfree(sbi->s_qf_names[i]);
2888 #endif
2889         ext4_blkdev_remove(sbi);
2890         brelse(bh);
2891 out_fail:
2892         sb->s_fs_info = NULL;
2893         kfree(sbi);
2894         lock_kernel();
2895         return ret;
2896 }
2897
2898 /*
2899  * Setup any per-fs journal parameters now.  We'll do this both on
2900  * initial mount, once the journal has been initialised but before we've
2901  * done any recovery; and again on any subsequent remount.
2902  */
2903 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2904 {
2905         struct ext4_sb_info *sbi = EXT4_SB(sb);
2906
2907         journal->j_commit_interval = sbi->s_commit_interval;
2908         journal->j_min_batch_time = sbi->s_min_batch_time;
2909         journal->j_max_batch_time = sbi->s_max_batch_time;
2910
2911         spin_lock(&journal->j_state_lock);
2912         if (test_opt(sb, BARRIER))
2913                 journal->j_flags |= JBD2_BARRIER;
2914         else
2915                 journal->j_flags &= ~JBD2_BARRIER;
2916         if (test_opt(sb, DATA_ERR_ABORT))
2917                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2918         else
2919                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2920         spin_unlock(&journal->j_state_lock);
2921 }
2922
2923 static journal_t *ext4_get_journal(struct super_block *sb,
2924                                    unsigned int journal_inum)
2925 {
2926         struct inode *journal_inode;
2927         journal_t *journal;
2928
2929         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2930
2931         /* First, test for the existence of a valid inode on disk.  Bad
2932          * things happen if we iget() an unused inode, as the subsequent
2933          * iput() will try to delete it. */
2934
2935         journal_inode = ext4_iget(sb, journal_inum);
2936         if (IS_ERR(journal_inode)) {
2937                 printk(KERN_ERR "EXT4-fs: no journal found.\n");
2938                 return NULL;
2939         }
2940         if (!journal_inode->i_nlink) {
2941                 make_bad_inode(journal_inode);
2942                 iput(journal_inode);
2943                 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2944                 return NULL;
2945         }
2946
2947         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2948                   journal_inode, journal_inode->i_size);
2949         if (!S_ISREG(journal_inode->i_mode)) {
2950                 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2951                 iput(journal_inode);
2952                 return NULL;
2953         }
2954
2955         journal = jbd2_journal_init_inode(journal_inode);
2956         if (!journal) {
2957                 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2958                 iput(journal_inode);
2959                 return NULL;
2960         }
2961         journal->j_private = sb;
2962         ext4_init_journal_params(sb, journal);
2963         return journal;
2964 }
2965
2966 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2967                                        dev_t j_dev)
2968 {
2969         struct buffer_head *bh;
2970         journal_t *journal;
2971         ext4_fsblk_t start;
2972         ext4_fsblk_t len;
2973         int hblock, blocksize;
2974         ext4_fsblk_t sb_block;
2975         unsigned long offset;
2976         struct ext4_super_block *es;
2977         struct block_device *bdev;
2978
2979         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2980
2981         bdev = ext4_blkdev_get(j_dev);
2982         if (bdev == NULL)
2983                 return NULL;
2984
2985         if (bd_claim(bdev, sb)) {
2986                 printk(KERN_ERR
2987                         "EXT4-fs: failed to claim external journal device.\n");
2988                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2989                 return NULL;
2990         }
2991
2992         blocksize = sb->s_blocksize;
2993         hblock = bdev_hardsect_size(bdev);
2994         if (blocksize < hblock) {
2995                 printk(KERN_ERR
2996                         "EXT4-fs: blocksize too small for journal device.\n");
2997                 goto out_bdev;
2998         }
2999
3000         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3001         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3002         set_blocksize(bdev, blocksize);
3003         if (!(bh = __bread(bdev, sb_block, blocksize))) {
3004                 printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
3005                        "external journal\n");
3006                 goto out_bdev;
3007         }
3008
3009         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3010         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3011             !(le32_to_cpu(es->s_feature_incompat) &
3012               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3013                 printk(KERN_ERR "EXT4-fs: external journal has "
3014                                         "bad superblock\n");
3015                 brelse(bh);
3016                 goto out_bdev;
3017         }
3018
3019         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3020                 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
3021                 brelse(bh);
3022                 goto out_bdev;
3023         }
3024
3025         len = ext4_blocks_count(es);
3026         start = sb_block + 1;
3027         brelse(bh);     /* we're done with the superblock */
3028
3029         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3030                                         start, len, blocksize);
3031         if (!journal) {
3032                 printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
3033                 goto out_bdev;
3034         }
3035         journal->j_private = sb;
3036         ll_rw_block(READ, 1, &journal->j_sb_buffer);
3037         wait_on_buffer(journal->j_sb_buffer);
3038         if (!buffer_uptodate(journal->j_sb_buffer)) {
3039                 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
3040                 goto out_journal;
3041         }
3042         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3043                 printk(KERN_ERR "EXT4-fs: External journal has more than one "
3044                                         "user (unsupported) - %d\n",
3045                         be32_to_cpu(journal->j_superblock->s_nr_users));
3046                 goto out_journal;
3047         }
3048         EXT4_SB(sb)->journal_bdev = bdev;
3049         ext4_init_journal_params(sb, journal);
3050         return journal;
3051 out_journal:
3052         jbd2_journal_destroy(journal);
3053 out_bdev:
3054         ext4_blkdev_put(bdev);
3055         return NULL;
3056 }
3057
3058 static int ext4_load_journal(struct super_block *sb,
3059                              struct ext4_super_block *es,
3060                              unsigned long journal_devnum)
3061 {
3062         journal_t *journal;
3063         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3064         dev_t journal_dev;
3065         int err = 0;
3066         int really_read_only;
3067
3068         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3069
3070         if (journal_devnum &&
3071             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3072                 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
3073                         "numbers have changed\n");
3074                 journal_dev = new_decode_dev(journal_devnum);
3075         } else
3076                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3077
3078         really_read_only = bdev_read_only(sb->s_bdev);
3079
3080         /*
3081          * Are we loading a blank journal or performing recovery after a
3082          * crash?  For recovery, we need to check in advance whether we
3083          * can get read-write access to the device.
3084          */
3085
3086         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3087                 if (sb->s_flags & MS_RDONLY) {
3088                         printk(KERN_INFO "EXT4-fs: INFO: recovery "
3089                                         "required on readonly filesystem.\n");
3090                         if (really_read_only) {
3091                                 printk(KERN_ERR "EXT4-fs: write access "
3092                                         "unavailable, cannot proceed.\n");
3093                                 return -EROFS;
3094                         }
3095                         printk(KERN_INFO "EXT4-fs: write access will "
3096                                "be enabled during recovery.\n");
3097                 }
3098         }
3099
3100         if (journal_inum && journal_dev) {
3101                 printk(KERN_ERR "EXT4-fs: filesystem has both journal "
3102                        "and inode journals!\n");
3103                 return -EINVAL;
3104         }
3105
3106         if (journal_inum) {
3107                 if (!(journal = ext4_get_journal(sb, journal_inum)))
3108                         return -EINVAL;
3109         } else {
3110                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3111                         return -EINVAL;
3112         }
3113
3114         if (journal->j_flags & JBD2_BARRIER)
3115                 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
3116         else
3117                 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
3118
3119         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3120                 err = jbd2_journal_update_format(journal);
3121                 if (err)  {
3122                         printk(KERN_ERR "EXT4-fs: error updating journal.\n");
3123                         jbd2_journal_destroy(journal);
3124                         return err;
3125                 }
3126         }
3127
3128         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3129                 err = jbd2_journal_wipe(journal, !really_read_only);
3130         if (!err)
3131                 err = jbd2_journal_load(journal);
3132
3133         if (err) {
3134                 printk(KERN_ERR "EXT4-fs: error loading journal.\n");
3135                 jbd2_journal_destroy(journal);
3136                 return err;
3137         }
3138
3139         EXT4_SB(sb)->s_journal = journal;
3140         ext4_clear_journal_err(sb, es);
3141
3142         if (journal_devnum &&
3143             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3144                 es->s_journal_dev = cpu_to_le32(journal_devnum);
3145
3146                 /* Make sure we flush the recovery flag to disk. */
3147                 ext4_commit_super(sb, 1);
3148         }
3149
3150         return 0;
3151 }
3152
3153 static int ext4_commit_super(struct super_block *sb, int sync)
3154 {
3155         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3156         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3157         int error = 0;
3158
3159         if (!sbh)
3160                 return error;
3161         if (buffer_write_io_error(sbh)) {
3162                 /*
3163                  * Oh, dear.  A previous attempt to write the
3164                  * superblock failed.  This could happen because the
3165                  * USB device was yanked out.  Or it could happen to
3166                  * be a transient write error and maybe the block will
3167                  * be remapped.  Nothing we can do but to retry the
3168                  * write and hope for the best.
3169                  */
3170                 printk(KERN_ERR "EXT4-fs: previous I/O error to "
3171                        "superblock detected for %s.\n", sb->s_id);
3172                 clear_buffer_write_io_error(sbh);
3173                 set_buffer_uptodate(sbh);
3174         }
3175         es->s_wtime = cpu_to_le32(get_seconds());
3176         es->s_kbytes_written =
3177                 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 
3178                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3179                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
3180         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3181                                         &EXT4_SB(sb)->s_freeblocks_counter));
3182         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3183                                         &EXT4_SB(sb)->s_freeinodes_counter));
3184         sb->s_dirt = 0;
3185         BUFFER_TRACE(sbh, "marking dirty");
3186         mark_buffer_dirty(sbh);
3187         if (sync) {
3188                 error = sync_dirty_buffer(sbh);
3189                 if (error)
3190                         return error;
3191
3192                 error = buffer_write_io_error(sbh);
3193                 if (error) {
3194                         printk(KERN_ERR "EXT4-fs: I/O error while writing "
3195                                "superblock for %s.\n", sb->s_id);
3196                         clear_buffer_write_io_error(sbh);
3197                         set_buffer_uptodate(sbh);
3198                 }
3199         }
3200         return error;
3201 }
3202
3203
3204 /*
3205  * Have we just finished recovery?  If so, and if we are mounting (or
3206  * remounting) the filesystem readonly, then we will end up with a
3207  * consistent fs on disk.  Record that fact.
3208  */
3209 static void ext4_mark_recovery_complete(struct super_block *sb,
3210                                         struct ext4_super_block *es)
3211 {
3212         journal_t *journal = EXT4_SB(sb)->s_journal;
3213
3214         if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3215                 BUG_ON(journal != NULL);
3216                 return;
3217         }
3218         jbd2_journal_lock_updates(journal);
3219         if (jbd2_journal_flush(journal) < 0)
3220                 goto out;
3221
3222         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3223             sb->s_flags & MS_RDONLY) {
3224                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3225                 ext4_commit_super(sb, 1);
3226         }
3227
3228 out:
3229         jbd2_journal_unlock_updates(journal);
3230 }
3231
3232 /*
3233  * If we are mounting (or read-write remounting) a filesystem whose journal
3234  * has recorded an error from a previous lifetime, move that error to the
3235  * main filesystem now.
3236  */
3237 static void ext4_clear_journal_err(struct super_block *sb,
3238                                    struct ext4_super_block *es)
3239 {
3240         journal_t *journal;
3241         int j_errno;
3242         const char *errstr;
3243
3244         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3245
3246         journal = EXT4_SB(sb)->s_journal;
3247
3248         /*
3249          * Now check for any error status which may have been recorded in the
3250          * journal by a prior ext4_error() or ext4_abort()
3251          */
3252
3253         j_errno = jbd2_journal_errno(journal);
3254         if (j_errno) {
3255                 char nbuf[16];
3256
3257                 errstr = ext4_decode_error(sb, j_errno, nbuf);
3258                 ext4_warning(sb, __func__, "Filesystem error recorded "
3259                              "from previous mount: %s", errstr);
3260                 ext4_warning(sb, __func__, "Marking fs in need of "
3261                              "filesystem check.");
3262
3263                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3264                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3265                 ext4_commit_super(sb, 1);
3266
3267                 jbd2_journal_clear_err(journal);
3268         }
3269 }
3270
3271 /*
3272  * Force the running and committing transactions to commit,
3273  * and wait on the commit.
3274  */
3275 int ext4_force_commit(struct super_block *sb)
3276 {
3277         journal_t *journal;
3278         int ret = 0;
3279
3280         if (sb->s_flags & MS_RDONLY)
3281                 return 0;
3282
3283         journal = EXT4_SB(sb)->s_journal;
3284         if (journal)
3285                 ret = ext4_journal_force_commit(journal);
3286
3287         return ret;
3288 }
3289
3290 static void ext4_write_super(struct super_block *sb)
3291 {
3292         ext4_commit_super(sb, 1);
3293 }
3294
3295 static int ext4_sync_fs(struct super_block *sb, int wait)
3296 {
3297         int ret = 0;
3298         tid_t target;
3299
3300         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3301         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3302                 if (wait)
3303                         jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3304         }
3305         return ret;
3306 }
3307
3308 /*
3309  * LVM calls this function before a (read-only) snapshot is created.  This
3310  * gives us a chance to flush the journal completely and mark the fs clean.
3311  */
3312 static int ext4_freeze(struct super_block *sb)
3313 {
3314         int error = 0;
3315         journal_t *journal;
3316
3317         if (sb->s_flags & MS_RDONLY)
3318                 return 0;
3319
3320         journal = EXT4_SB(sb)->s_journal;
3321
3322         /* Now we set up the journal barrier. */
3323         jbd2_journal_lock_updates(journal);
3324
3325         /*
3326          * Don't clear the needs_recovery flag if we failed to flush
3327          * the journal.
3328          */
3329         error = jbd2_journal_flush(journal);
3330         if (error < 0) {
3331         out:
3332                 jbd2_journal_unlock_updates(journal);
3333                 return error;
3334         }
3335
3336         /* Journal blocked and flushed, clear needs_recovery flag. */
3337         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3338         error = ext4_commit_super(sb, 1);
3339         if (error)
3340                 goto out;
3341         return 0;
3342 }
3343
3344 /*
3345  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3346  * flag here, even though the filesystem is not technically dirty yet.
3347  */
3348 static int ext4_unfreeze(struct super_block *sb)
3349 {
3350         if (sb->s_flags & MS_RDONLY)
3351                 return 0;
3352
3353         lock_super(sb);
3354         /* Reset the needs_recovery flag before the fs is unlocked. */
3355         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3356         ext4_commit_super(sb, 1);
3357         unlock_super(sb);
3358         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3359         return 0;
3360 }
3361
3362 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3363 {
3364         struct ext4_super_block *es;
3365         struct ext4_sb_info *sbi = EXT4_SB(sb);
3366         ext4_fsblk_t n_blocks_count = 0;
3367         unsigned long old_sb_flags;
3368         struct ext4_mount_options old_opts;
3369         ext4_group_t g;
3370         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3371         int err;
3372 #ifdef CONFIG_QUOTA
3373         int i;
3374 #endif
3375
3376         /* Store the original options */
3377         old_sb_flags = sb->s_flags;
3378         old_opts.s_mount_opt = sbi->s_mount_opt;
3379         old_opts.s_resuid = sbi->s_resuid;
3380         old_opts.s_resgid = sbi->s_resgid;
3381         old_opts.s_commit_interval = sbi->s_commit_interval;
3382         old_opts.s_min_batch_time = sbi->s_min_batch_time;
3383         old_opts.s_max_batch_time = sbi->s_max_batch_time;
3384 #ifdef CONFIG_QUOTA
3385         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3386         for (i = 0; i < MAXQUOTAS; i++)
3387                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3388 #endif
3389         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3390                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3391
3392         /*
3393          * Allow the "check" option to be passed as a remount option.
3394          */
3395         if (!parse_options(data, sb, NULL, &journal_ioprio,
3396                            &n_blocks_count, 1)) {
3397                 err = -EINVAL;
3398                 goto restore_opts;
3399         }
3400
3401         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3402                 ext4_abort(sb, __func__, "Abort forced by user");
3403
3404         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3405                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3406
3407         es = sbi->s_es;
3408
3409         if (sbi->s_journal) {
3410                 ext4_init_journal_params(sb, sbi->s_journal);
3411                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3412         }
3413
3414         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3415                 n_blocks_count > ext4_blocks_count(es)) {
3416                 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3417                         err = -EROFS;
3418                         goto restore_opts;
3419                 }
3420
3421                 if (*flags & MS_RDONLY) {
3422                         /*
3423                          * First of all, the unconditional stuff we have to do
3424                          * to disable replay of the journal when we next remount
3425                          */
3426                         sb->s_flags |= MS_RDONLY;
3427
3428                         /*
3429                          * OK, test if we are remounting a valid rw partition
3430                          * readonly, and if so set the rdonly flag and then
3431                          * mark the partition as valid again.
3432                          */
3433                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3434                             (sbi->s_mount_state & EXT4_VALID_FS))
3435                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
3436
3437                         if (sbi->s_journal)
3438                                 ext4_mark_recovery_complete(sb, es);
3439                 } else {
3440                         int ret;
3441                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3442                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3443                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3444                                        "remount RDWR because of unsupported "
3445                                        "optional features (%x).\n", sb->s_id,
3446                                 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3447                                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
3448                                 err = -EROFS;
3449                                 goto restore_opts;
3450                         }
3451
3452                         /*
3453                          * Make sure the group descriptor checksums
3454                          * are sane.  If they aren't, refuse to
3455                          * remount r/w.
3456                          */
3457                         for (g = 0; g < sbi->s_groups_count; g++) {
3458                                 struct ext4_group_desc *gdp =
3459                                         ext4_get_group_desc(sb, g, NULL);
3460
3461                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3462                                         printk(KERN_ERR
3463                "EXT4-fs: ext4_remount: "
3464                 "Checksum for group %u failed (%u!=%u)\n",
3465                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3466                                                le16_to_cpu(gdp->bg_checksum));
3467                                         err = -EINVAL;
3468                                         goto restore_opts;
3469                                 }
3470                         }
3471
3472                         /*
3473                          * If we have an unprocessed orphan list hanging
3474                          * around from a previously readonly bdev mount,
3475                          * require a full umount/remount for now.
3476                          */
3477                         if (es->s_last_orphan) {
3478                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3479                                        "remount RDWR because of unprocessed "
3480                                        "orphan inode list.  Please "
3481                                        "umount/remount instead.\n",
3482                                        sb->s_id);
3483                                 err = -EINVAL;
3484                                 goto restore_opts;
3485                         }
3486
3487                         /*
3488                          * Mounting a RDONLY partition read-write, so reread
3489                          * and store the current valid flag.  (It may have
3490                          * been changed by e2fsck since we originally mounted
3491                          * the partition.)
3492                          */
3493                         if (sbi->s_journal)
3494                                 ext4_clear_journal_err(sb, es);
3495                         sbi->s_mount_state = le16_to_cpu(es->s_state);
3496                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3497                                 goto restore_opts;
3498                         if (!ext4_setup_super(sb, es, 0))
3499                                 sb->s_flags &= ~MS_RDONLY;
3500                 }
3501         }
3502         if (sbi->s_journal == NULL)
3503                 ext4_commit_super(sb, 1);
3504
3505 #ifdef CONFIG_QUOTA
3506         /* Release old quota file names */
3507         for (i = 0; i < MAXQUOTAS; i++)
3508                 if (old_opts.s_qf_names[i] &&
3509                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3510                         kfree(old_opts.s_qf_names[i]);
3511 #endif
3512         return 0;
3513 restore_opts:
3514         sb->s_flags = old_sb_flags;
3515         sbi->s_mount_opt = old_opts.s_mount_opt;
3516         sbi->s_resuid = old_opts.s_resuid;
3517         sbi->s_resgid = old_opts.s_resgid;
3518         sbi->s_commit_interval = old_opts.s_commit_interval;
3519         sbi->s_min_batch_time = old_opts.s_min_batch_time;
3520         sbi->s_max_batch_time = old_opts.s_max_batch_time;
3521 #ifdef CONFIG_QUOTA
3522         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3523         for (i = 0; i < MAXQUOTAS; i++) {
3524                 if (sbi->s_qf_names[i] &&
3525                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3526                         kfree(sbi->s_qf_names[i]);
3527                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3528         }
3529 #endif
3530         return err;
3531 }
3532
3533 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3534 {
3535         struct super_block *sb = dentry->d_sb;
3536         struct ext4_sb_info *sbi = EXT4_SB(sb);
3537         struct ext4_super_block *es = sbi->s_es;
3538         u64 fsid;
3539
3540         if (test_opt(sb, MINIX_DF)) {
3541                 sbi->s_overhead_last = 0;
3542         } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3543                 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3544                 ext4_fsblk_t overhead = 0;
3545
3546                 /*
3547                  * Compute the overhead (FS structures).  This is constant
3548                  * for a given filesystem unless the number of block groups
3549                  * changes so we cache the previous value until it does.
3550                  */
3551
3552                 /*
3553                  * All of the blocks before first_data_block are
3554                  * overhead
3555                  */
3556                 overhead = le32_to_cpu(es->s_first_data_block);
3557
3558                 /*
3559                  * Add the overhead attributed to the superblock and
3560                  * block group descriptors.  If the sparse superblocks
3561                  * feature is turned on, then not all groups have this.
3562                  */
3563                 for (i = 0; i < ngroups; i++) {
3564                         overhead += ext4_bg_has_super(sb, i) +
3565                                 ext4_bg_num_gdb(sb, i);
3566                         cond_resched();
3567                 }
3568
3569                 /*
3570                  * Every block group has an inode bitmap, a block
3571                  * bitmap, and an inode table.
3572                  */
3573                 overhead += ngroups * (2 + sbi->s_itb_per_group);
3574                 sbi->s_overhead_last = overhead;
3575                 smp_wmb();
3576                 sbi->s_blocks_last = ext4_blocks_count(es);
3577         }
3578
3579         buf->f_type = EXT4_SUPER_MAGIC;
3580         buf->f_bsize = sb->s_blocksize;
3581         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3582         buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3583                        percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3584         ext4_free_blocks_count_set(es, buf->f_bfree);
3585         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3586         if (buf->f_bfree < ext4_r_blocks_count(es))
3587                 buf->f_bavail = 0;
3588         buf->f_files = le32_to_cpu(es->s_inodes_count);
3589         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3590         es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3591         buf->f_namelen = EXT4_NAME_LEN;
3592         fsid = le64_to_cpup((void *)es->s_uuid) ^
3593                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3594         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3595         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3596         return 0;
3597 }
3598
3599 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3600  * is locked for write. Otherwise the are possible deadlocks:
3601  * Process 1                         Process 2
3602  * ext4_create()                     quota_sync()
3603  *   jbd2_journal_start()                  write_dquot()
3604  *   vfs_dq_init()                         down(dqio_mutex)
3605  *     down(dqio_mutex)                    jbd2_journal_start()
3606  *
3607  */
3608
3609 #ifdef CONFIG_QUOTA
3610
3611 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3612 {
3613         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3614 }
3615
3616 static int ext4_write_dquot(struct dquot *dquot)
3617 {
3618         int ret, err;
3619         handle_t *handle;
3620         struct inode *inode;
3621
3622         inode = dquot_to_inode(dquot);
3623         handle = ext4_journal_start(inode,
3624                                         EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3625         if (IS_ERR(handle))
3626                 return PTR_ERR(handle);
3627         ret = dquot_commit(dquot);
3628         err = ext4_journal_stop(handle);
3629         if (!ret)
3630                 ret = err;
3631         return ret;
3632 }
3633
3634 static int ext4_acquire_dquot(struct dquot *dquot)
3635 {
3636         int ret, err;
3637         handle_t *handle;
3638
3639         handle = ext4_journal_start(dquot_to_inode(dquot),
3640                                         EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3641         if (IS_ERR(handle))
3642                 return PTR_ERR(handle);
3643         ret = dquot_acquire(dquot);
3644         err = ext4_journal_stop(handle);
3645         if (!ret)
3646                 ret = err;
3647         return ret;
3648 }
3649
3650 static int ext4_release_dquot(struct dquot *dquot)
3651 {
3652         int ret, err;
3653         handle_t *handle;
3654
3655         handle = ext4_journal_start(dquot_to_inode(dquot),
3656                                         EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3657         if (IS_ERR(handle)) {
3658                 /* Release dquot anyway to avoid endless cycle in dqput() */
3659                 dquot_release(dquot);
3660                 return PTR_ERR(handle);
3661         }
3662         ret = dquot_release(dquot);
3663         err = ext4_journal_stop(handle);
3664         if (!ret)
3665                 ret = err;
3666         return ret;
3667 }
3668
3669 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3670 {
3671         /* Are we journaling quotas? */
3672         if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3673             EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3674                 dquot_mark_dquot_dirty(dquot);
3675                 return ext4_write_dquot(dquot);
3676         } else {
3677                 return dquot_mark_dquot_dirty(dquot);
3678         }
3679 }
3680
3681 static int ext4_write_info(struct super_block *sb, int type)
3682 {
3683         int ret, err;
3684         handle_t *handle;
3685
3686         /* Data block + inode block */
3687         handle = ext4_journal_start(sb->s_root->d_inode, 2);
3688         if (IS_ERR(handle))
3689                 return PTR_ERR(handle);
3690         ret = dquot_commit_info(sb, type);
3691         err = ext4_journal_stop(handle);
3692         if (!ret)
3693                 ret = err;
3694         return ret;
3695 }
3696
3697 /*
3698  * Turn on quotas during mount time - we need to find
3699  * the quota file and such...
3700  */
3701 static int ext4_quota_on_mount(struct super_block *sb, int type)
3702 {
3703         return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3704                         EXT4_SB(sb)->s_jquota_fmt, type);
3705 }
3706
3707 /*
3708  * Standard function to be called on quota_on
3709  */
3710 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3711                          char *name, int remount)
3712 {
3713         int err;
3714         struct path path;
3715
3716         if (!test_opt(sb, QUOTA))
3717                 return -EINVAL;
3718         /* When remounting, no checks are needed and in fact, name is NULL */
3719         if (remount)
3720                 return vfs_quota_on(sb, type, format_id, name, remount);
3721
3722         err = kern_path(name, LOOKUP_FOLLOW, &path);
3723         if (err)
3724                 return err;
3725
3726         /* Quotafile not on the same filesystem? */
3727         if (path.mnt->mnt_sb != sb) {
3728                 path_put(&path);
3729                 return -EXDEV;
3730         }
3731         /* Journaling quota? */
3732         if (EXT4_SB(sb)->s_qf_names[type]) {
3733                 /* Quotafile not in fs root? */
3734                 if (path.dentry->d_parent != sb->s_root)
3735                         printk(KERN_WARNING
3736                                 "EXT4-fs: Quota file not on filesystem root. "
3737                                 "Journaled quota will not work.\n");
3738         }
3739
3740         /*
3741          * When we journal data on quota file, we have to flush journal to see
3742          * all updates to the file when we bypass pagecache...
3743          */
3744         if (EXT4_SB(sb)->s_journal &&
3745             ext4_should_journal_data(path.dentry->d_inode)) {
3746                 /*
3747                  * We don't need to lock updates but journal_flush() could
3748                  * otherwise be livelocked...
3749                  */
3750                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3751                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3752                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3753                 if (err) {
3754                         path_put(&path);
3755                         return err;
3756                 }
3757         }
3758
3759         err = vfs_quota_on_path(sb, type, format_id, &path);
3760         path_put(&path);
3761         return err;
3762 }
3763
3764 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3765  * acquiring the locks... As quota files are never truncated and quota code
3766  * itself serializes the operations (and noone else should touch the files)
3767  * we don't have to be afraid of races */
3768 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3769                                size_t len, loff_t off)
3770 {
3771         struct inode *inode = sb_dqopt(sb)->files[type];
3772         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3773         int err = 0;
3774         int offset = off & (sb->s_blocksize - 1);
3775         int tocopy;
3776         size_t toread;
3777         struct buffer_head *bh;
3778         loff_t i_size = i_size_read(inode);
3779
3780         if (off > i_size)
3781                 return 0;
3782         if (off+len > i_size)
3783                 len = i_size-off;
3784         toread = len;
3785         while (toread > 0) {
3786                 tocopy = sb->s_blocksize - offset < toread ?
3787                                 sb->s_blocksize - offset : toread;
3788                 bh = ext4_bread(NULL, inode, blk, 0, &err);
3789                 if (err)
3790                         return err;
3791                 if (!bh)        /* A hole? */
3792                         memset(data, 0, tocopy);
3793                 else
3794                         memcpy(data, bh->b_data+offset, tocopy);
3795                 brelse(bh);
3796                 offset = 0;
3797                 toread -= tocopy;
3798                 data += tocopy;
3799                 blk++;
3800         }
3801         return len;
3802 }
3803
3804 /* Write to quotafile (we know the transaction is already started and has
3805  * enough credits) */
3806 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3807                                 const char *data, size_t len, loff_t off)
3808 {
3809         struct inode *inode = sb_dqopt(sb)->files[type];
3810         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3811         int err = 0;
3812         int offset = off & (sb->s_blocksize - 1);
3813         int tocopy;
3814         int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3815         size_t towrite = len;
3816         struct buffer_head *bh;
3817         handle_t *handle = journal_current_handle();
3818
3819         if (EXT4_SB(sb)->s_journal && !handle) {
3820                 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3821                         " cancelled because transaction is not started.\n",
3822                         (unsigned long long)off, (unsigned long long)len);
3823                 return -EIO;
3824         }
3825         mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3826         while (towrite > 0) {
3827                 tocopy = sb->s_blocksize - offset < towrite ?
3828                                 sb->s_blocksize - offset : towrite;
3829                 bh = ext4_bread(handle, inode, blk, 1, &err);
3830                 if (!bh)
3831                         goto out;
3832                 if (journal_quota) {
3833                         err = ext4_journal_get_write_access(handle, bh);
3834                         if (err) {
3835                                 brelse(bh);
3836                                 goto out;
3837                         }
3838                 }
3839                 lock_buffer(bh);
3840                 memcpy(bh->b_data+offset, data, tocopy);
3841                 flush_dcache_page(bh->b_page);
3842                 unlock_buffer(bh);
3843                 if (journal_quota)
3844                         err = ext4_handle_dirty_metadata(handle, NULL, bh);
3845                 else {
3846                         /* Always do at least ordered writes for quotas */
3847                         err = ext4_jbd2_file_inode(handle, inode);
3848                         mark_buffer_dirty(bh);
3849                 }
3850                 brelse(bh);
3851                 if (err)
3852                         goto out;
3853                 offset = 0;
3854                 towrite -= tocopy;
3855                 data += tocopy;
3856                 blk++;
3857         }
3858 out:
3859         if (len == towrite) {
3860                 mutex_unlock(&inode->i_mutex);
3861                 return err;
3862         }
3863         if (inode->i_size < off+len-towrite) {
3864                 i_size_write(inode, off+len-towrite);
3865                 EXT4_I(inode)->i_disksize = inode->i_size;
3866         }
3867         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3868         ext4_mark_inode_dirty(handle, inode);
3869         mutex_unlock(&inode->i_mutex);
3870         return len - towrite;
3871 }
3872
3873 #endif
3874
3875 static int ext4_get_sb(struct file_system_type *fs_type,
3876         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3877 {
3878         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3879 }
3880
3881 static struct file_system_type ext4_fs_type = {
3882         .owner          = THIS_MODULE,
3883         .name           = "ext4",
3884         .get_sb         = ext4_get_sb,
3885         .kill_sb        = kill_block_super,
3886         .fs_flags       = FS_REQUIRES_DEV,
3887 };
3888
3889 #ifdef CONFIG_EXT4DEV_COMPAT
3890 static int ext4dev_get_sb(struct file_system_type *fs_type,
3891         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3892 {
3893         printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3894                "to mount using ext4\n");
3895         printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3896                "will go away by 2.6.31\n");
3897         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3898 }
3899
3900 static struct file_system_type ext4dev_fs_type = {
3901         .owner          = THIS_MODULE,
3902         .name           = "ext4dev",
3903         .get_sb         = ext4dev_get_sb,
3904         .kill_sb        = kill_block_super,
3905         .fs_flags       = FS_REQUIRES_DEV,
3906 };
3907 MODULE_ALIAS("ext4dev");
3908 #endif
3909
3910 static int __init init_ext4_fs(void)
3911 {
3912         int err;
3913
3914         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3915         if (!ext4_kset)
3916                 return -ENOMEM;
3917         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3918         err = init_ext4_mballoc();
3919         if (err)
3920                 return err;
3921
3922         err = init_ext4_xattr();
3923         if (err)
3924                 goto out2;
3925         err = init_inodecache();
3926         if (err)
3927                 goto out1;
3928         err = register_filesystem(&ext4_fs_type);
3929         if (err)
3930                 goto out;
3931 #ifdef CONFIG_EXT4DEV_COMPAT
3932         err = register_filesystem(&ext4dev_fs_type);
3933         if (err) {
3934                 unregister_filesystem(&ext4_fs_type);
3935                 goto out;
3936         }
3937 #endif
3938         return 0;
3939 out:
3940         destroy_inodecache();
3941 out1:
3942         exit_ext4_xattr();
3943 out2:
3944         exit_ext4_mballoc();
3945         return err;
3946 }
3947
3948 static void __exit exit_ext4_fs(void)
3949 {
3950         unregister_filesystem(&ext4_fs_type);
3951 #ifdef CONFIG_EXT4DEV_COMPAT
3952         unregister_filesystem(&ext4dev_fs_type);
3953 #endif
3954         destroy_inodecache();
3955         exit_ext4_xattr();
3956         exit_ext4_mballoc();
3957         remove_proc_entry("fs/ext4", NULL);
3958         kset_unregister(ext4_kset);
3959 }
3960
3961 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3962 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3963 MODULE_LICENSE("GPL");
3964 module_init(init_ext4_fs)
3965 module_exit(exit_ext4_fs)