git.oblomov.eu Git - linux-2.6/blob - fs/ext4/super.c

   1 /*
   2  *  linux/fs/ext4/super.c
   3  *
   4  * Copyright (C) 1992, 1993, 1994, 1995
   5  * Remy Card (card@masi.ibp.fr)
   6  * Laboratoire MASI - Institut Blaise Pascal
   7  * Universite Pierre et Marie Curie (Paris VI)
   8  *
   9  *  from
  10  *
  11  *  linux/fs/minix/inode.c
  12  *
  13  *  Copyright (C) 1991, 1992  Linus Torvalds
  14  *
  15  *  Big-endian to little-endian byte-swapping/bitmaps by
  16  *        David S. Miller (davem@caip.rutgers.edu), 1995
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/string.h>
  21 #include <linux/fs.h>
  22 #include <linux/time.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/jbd2.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/blkdev.h>
  28 #include <linux/parser.h>
  29 #include <linux/smp_lock.h>
  30 #include <linux/buffer_head.h>
  31 #include <linux/exportfs.h>
  32 #include <linux/vfs.h>
  33 #include <linux/random.h>
  34 #include <linux/mount.h>
  35 #include <linux/namei.h>
  36 #include <linux/quotaops.h>
  37 #include <linux/seq_file.h>
  38 #include <linux/proc_fs.h>
  39 #include <linux/ctype.h>
  40 #include <linux/marker.h>
  41 #include <linux/log2.h>
  42 #include <linux/crc16.h>
  43 #include <asm/uaccess.h>
  44
  45 #include "ext4.h"
  46 #include "ext4_jbd2.h"
  47 #include "xattr.h"
  48 #include "acl.h"
  49
  50 struct proc_dir_entry *ext4_proc_root;
  51 static struct kset *ext4_kset;
  52
  53 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  54                              unsigned long journal_devnum);
  55 static int ext4_commit_super(struct super_block *sb, int sync);
  56 static void ext4_mark_recovery_complete(struct super_block *sb,
  57                                         struct ext4_super_block *es);
  58 static void ext4_clear_journal_err(struct super_block *sb,
  59                                    struct ext4_super_block *es);
  60 static int ext4_sync_fs(struct super_block *sb, int wait);
  61 static const char *ext4_decode_error(struct super_block *sb, int errno,
  62                                      char nbuf[16]);
  63 static int ext4_remount(struct super_block *sb, int *flags, char *data);
  64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  65 static int ext4_unfreeze(struct super_block *sb);
  66 static void ext4_write_super(struct super_block *sb);
  67 static int ext4_freeze(struct super_block *sb);
  68
  69
  70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
  71                                struct ext4_group_desc *bg)
  72 {
  73         return le32_to_cpu(bg->bg_block_bitmap_lo) |
  74                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  75                 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
  76 }
  77
  78 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
  79                                struct ext4_group_desc *bg)
  80 {
  81         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
  82                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  83                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
  84 }
  85
  86 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
  87                               struct ext4_group_desc *bg)
  88 {
  89         return le32_to_cpu(bg->bg_inode_table_lo) |
  90                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  91                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
  92 }
  93
  94 __u32 ext4_free_blks_count(struct super_block *sb,
  95                               struct ext4_group_desc *bg)
  96 {
  97         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
  98                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  99                 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 100 }
 101
 102 __u32 ext4_free_inodes_count(struct super_block *sb,
 103                               struct ext4_group_desc *bg)
 104 {
 105         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 106                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 107                 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 108 }
 109
 110 __u32 ext4_used_dirs_count(struct super_block *sb,
 111                               struct ext4_group_desc *bg)
 112 {
 113         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 114                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 115                 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 116 }
 117
 118 __u32 ext4_itable_unused_count(struct super_block *sb,
 119                               struct ext4_group_desc *bg)
 120 {
 121         return le16_to_cpu(bg->bg_itable_unused_lo) |
 122                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 123                 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 124 }
 125
 126 void ext4_block_bitmap_set(struct super_block *sb,
 127                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 128 {
 129         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 130         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 131                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 132 }
 133
 134 void ext4_inode_bitmap_set(struct super_block *sb,
 135                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 136 {
 137         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 138         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 139                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 140 }
 141
 142 void ext4_inode_table_set(struct super_block *sb,
 143                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 144 {
 145         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 146         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 147                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 148 }
 149
 150 void ext4_free_blks_set(struct super_block *sb,
 151                           struct ext4_group_desc *bg, __u32 count)
 152 {
 153         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 154         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 155                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 156 }
 157
 158 void ext4_free_inodes_set(struct super_block *sb,
 159                           struct ext4_group_desc *bg, __u32 count)
 160 {
 161         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 162         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 163                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 164 }
 165
 166 void ext4_used_dirs_set(struct super_block *sb,
 167                           struct ext4_group_desc *bg, __u32 count)
 168 {
 169         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 170         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 171                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 172 }
 173
 174 void ext4_itable_unused_set(struct super_block *sb,
 175                           struct ext4_group_desc *bg, __u32 count)
 176 {
 177         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 178         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 179                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 180 }
 181
 182 /*
 183  * Wrappers for jbd2_journal_start/end.
 184  *
 185  * The only special thing we need to do here is to make sure that all
 186  * journal_end calls result in the superblock being marked dirty, so
 187  * that sync() will call the filesystem's write_super callback if
 188  * appropriate.
 189  */
 190 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 191 {
 192         journal_t *journal;
 193
 194         if (sb->s_flags & MS_RDONLY)
 195                 return ERR_PTR(-EROFS);
 196
 197         /* Special case here: if the journal has aborted behind our
 198          * backs (eg. EIO in the commit thread), then we still need to
 199          * take the FS itself readonly cleanly. */
 200         journal = EXT4_SB(sb)->s_journal;
 201         if (journal) {
 202                 if (is_journal_aborted(journal)) {
 203                         ext4_abort(sb, __func__,
 204                                    "Detected aborted journal");
 205                         return ERR_PTR(-EROFS);
 206                 }
 207                 return jbd2_journal_start(journal, nblocks);
 208         }
 209         /*
 210          * We're not journaling, return the appropriate indication.
 211          */
 212         current->journal_info = EXT4_NOJOURNAL_HANDLE;
 213         return current->journal_info;
 214 }
 215
 216 /*
 217  * The only special thing we need to do here is to make sure that all
 218  * jbd2_journal_stop calls result in the superblock being marked dirty, so
 219  * that sync() will call the filesystem's write_super callback if
 220  * appropriate.
 221  */
 222 int __ext4_journal_stop(const char *where, handle_t *handle)
 223 {
 224         struct super_block *sb;
 225         int err;
 226         int rc;
 227
 228         if (!ext4_handle_valid(handle)) {
 229                 /*
 230                  * Do this here since we don't call jbd2_journal_stop() in
 231                  * no-journal mode.
 232                  */
 233                 current->journal_info = NULL;
 234                 return 0;
 235         }
 236         sb = handle->h_transaction->t_journal->j_private;
 237         err = handle->h_err;
 238         rc = jbd2_journal_stop(handle);
 239
 240         if (!err)
 241                 err = rc;
 242         if (err)
 243                 __ext4_std_error(sb, where, err);
 244         return err;
 245 }
 246
 247 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 248                 struct buffer_head *bh, handle_t *handle, int err)
 249 {
 250         char nbuf[16];
 251         const char *errstr = ext4_decode_error(NULL, err, nbuf);
 252
 253         BUG_ON(!ext4_handle_valid(handle));
 254
 255         if (bh)
 256                 BUFFER_TRACE(bh, "abort");
 257
 258         if (!handle->h_err)
 259                 handle->h_err = err;
 260
 261         if (is_handle_aborted(handle))
 262                 return;
 263
 264         printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
 265                caller, errstr, err_fn);
 266
 267         jbd2_journal_abort_handle(handle);
 268 }
 269
 270 /* Deal with the reporting of failure conditions on a filesystem such as
 271  * inconsistencies detected or read IO failures.
 272  *
 273  * On ext2, we can store the error state of the filesystem in the
 274  * superblock.  That is not possible on ext4, because we may have other
 275  * write ordering constraints on the superblock which prevent us from
 276  * writing it out straight away; and given that the journal is about to
 277  * be aborted, we can't rely on the current, or future, transactions to
 278  * write out the superblock safely.
 279  *
 280  * We'll just use the jbd2_journal_abort() error code to record an error in
 281  * the journal instead.  On recovery, the journal will compain about
 282  * that error until we've noted it down and cleared it.
 283  */
 284
 285 static void ext4_handle_error(struct super_block *sb)
 286 {
 287         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 288
 289         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 290         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 291
 292         if (sb->s_flags & MS_RDONLY)
 293                 return;
 294
 295         if (!test_opt(sb, ERRORS_CONT)) {
 296                 journal_t *journal = EXT4_SB(sb)->s_journal;
 297
 298                 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 299                 if (journal)
 300                         jbd2_journal_abort(journal, -EIO);
 301         }
 302         if (test_opt(sb, ERRORS_RO)) {
 303                 printk(KERN_CRIT "Remounting filesystem read-only\n");
 304                 sb->s_flags |= MS_RDONLY;
 305         }
 306         ext4_commit_super(sb, 1);
 307         if (test_opt(sb, ERRORS_PANIC))
 308                 panic("EXT4-fs (device %s): panic forced after error\n",
 309                         sb->s_id);
 310 }
 311
 312 void ext4_error(struct super_block *sb, const char *function,
 313                 const char *fmt, ...)
 314 {
 315         va_list args;
 316
 317         va_start(args, fmt);
 318         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 319         vprintk(fmt, args);
 320         printk("\n");
 321         va_end(args);
 322
 323         ext4_handle_error(sb);
 324 }
 325
 326 static const char *ext4_decode_error(struct super_block *sb, int errno,
 327                                      char nbuf[16])
 328 {
 329         char *errstr = NULL;
 330
 331         switch (errno) {
 332         case -EIO:
 333                 errstr = "IO failure";
 334                 break;
 335         case -ENOMEM:
 336                 errstr = "Out of memory";
 337                 break;
 338         case -EROFS:
 339                 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
 340                         errstr = "Journal has aborted";
 341                 else
 342                         errstr = "Readonly filesystem";
 343                 break;
 344         default:
 345                 /* If the caller passed in an extra buffer for unknown
 346                  * errors, textualise them now.  Else we just return
 347                  * NULL. */
 348                 if (nbuf) {
 349                         /* Check for truncated error codes... */
 350                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 351                                 errstr = nbuf;
 352                 }
 353                 break;
 354         }
 355
 356         return errstr;
 357 }
 358
 359 /* __ext4_std_error decodes expected errors from journaling functions
 360  * automatically and invokes the appropriate error response.  */
 361
 362 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
 363 {
 364         char nbuf[16];
 365         const char *errstr;
 366
 367         /* Special case: if the error is EROFS, and we're not already
 368          * inside a transaction, then there's really no point in logging
 369          * an error. */
 370         if (errno == -EROFS && journal_current_handle() == NULL &&
 371             (sb->s_flags & MS_RDONLY))
 372                 return;
 373
 374         errstr = ext4_decode_error(sb, errno, nbuf);
 375         printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 376                sb->s_id, function, errstr);
 377
 378         ext4_handle_error(sb);
 379 }
 380
 381 /*
 382  * ext4_abort is a much stronger failure handler than ext4_error.  The
 383  * abort function may be used to deal with unrecoverable failures such
 384  * as journal IO errors or ENOMEM at a critical moment in log management.
 385  *
 386  * We unconditionally force the filesystem into an ABORT|READONLY state,
 387  * unless the error response on the fs has been set to panic in which
 388  * case we take the easy way out and panic immediately.
 389  */
 390
 391 void ext4_abort(struct super_block *sb, const char *function,
 392                 const char *fmt, ...)
 393 {
 394         va_list args;
 395
 396         printk(KERN_CRIT "ext4_abort called.\n");
 397
 398         va_start(args, fmt);
 399         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 400         vprintk(fmt, args);
 401         printk("\n");
 402         va_end(args);
 403
 404         if (test_opt(sb, ERRORS_PANIC))
 405                 panic("EXT4-fs panic from previous error\n");
 406
 407         if (sb->s_flags & MS_RDONLY)
 408                 return;
 409
 410         printk(KERN_CRIT "Remounting filesystem read-only\n");
 411         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 412         sb->s_flags |= MS_RDONLY;
 413         EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 414         if (EXT4_SB(sb)->s_journal)
 415                 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 416 }
 417
 418 void ext4_warning(struct super_block *sb, const char *function,
 419                   const char *fmt, ...)
 420 {
 421         va_list args;
 422
 423         va_start(args, fmt);
 424         printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 425                sb->s_id, function);
 426         vprintk(fmt, args);
 427         printk("\n");
 428         va_end(args);
 429 }
 430
 431 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
 432                                 const char *function, const char *fmt, ...)
 433 __releases(bitlock)
 434 __acquires(bitlock)
 435 {
 436         va_list args;
 437         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 438
 439         va_start(args, fmt);
 440         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 441         vprintk(fmt, args);
 442         printk("\n");
 443         va_end(args);
 444
 445         if (test_opt(sb, ERRORS_CONT)) {
 446                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 447                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 448                 ext4_commit_super(sb, 0);
 449                 return;
 450         }
 451         ext4_unlock_group(sb, grp);
 452         ext4_handle_error(sb);
 453         /*
 454          * We only get here in the ERRORS_RO case; relocking the group
 455          * may be dangerous, but nothing bad will happen since the
 456          * filesystem will have already been marked read/only and the
 457          * journal has been aborted.  We return 1 as a hint to callers
 458          * who might what to use the return value from
 459          * ext4_grp_locked_error() to distinguish beween the
 460          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 461          * aggressively from the ext4 function in question, with a
 462          * more appropriate error code.
 463          */
 464         ext4_lock_group(sb, grp);
 465         return;
 466 }
 467
 468
 469 void ext4_update_dynamic_rev(struct super_block *sb)
 470 {
 471         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 472
 473         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 474                 return;
 475
 476         ext4_warning(sb, __func__,
 477                      "updating to rev %d because of new feature flag, "
 478                      "running e2fsck is recommended",
 479                      EXT4_DYNAMIC_REV);
 480
 481         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 482         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 483         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 484         /* leave es->s_feature_*compat flags alone */
 485         /* es->s_uuid will be set by e2fsck if empty */
 486
 487         /*
 488          * The rest of the superblock fields should be zero, and if not it
 489          * means they are likely already in use, so leave them alone.  We
 490          * can leave it up to e2fsck to clean up any inconsistencies there.
 491          */
 492 }
 493
 494 /*
 495  * Open the external journal device
 496  */
 497 static struct block_device *ext4_blkdev_get(dev_t dev)
 498 {
 499         struct block_device *bdev;
 500         char b[BDEVNAME_SIZE];
 501
 502         bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 503         if (IS_ERR(bdev))
 504                 goto fail;
 505         return bdev;
 506
 507 fail:
 508         printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
 509                         __bdevname(dev, b), PTR_ERR(bdev));
 510         return NULL;
 511 }
 512
 513 /*
 514  * Release the journal device
 515  */
 516 static int ext4_blkdev_put(struct block_device *bdev)
 517 {
 518         bd_release(bdev);
 519         return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 520 }
 521
 522 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 523 {
 524         struct block_device *bdev;
 525         int ret = -ENODEV;
 526
 527         bdev = sbi->journal_bdev;
 528         if (bdev) {
 529                 ret = ext4_blkdev_put(bdev);
 530                 sbi->journal_bdev = NULL;
 531         }
 532         return ret;
 533 }
 534
 535 static inline struct inode *orphan_list_entry(struct list_head *l)
 536 {
 537         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 538 }
 539
 540 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 541 {
 542         struct list_head *l;
 543
 544         printk(KERN_ERR "sb orphan head is %d\n",
 545                le32_to_cpu(sbi->s_es->s_last_orphan));
 546
 547         printk(KERN_ERR "sb_info orphan list:\n");
 548         list_for_each(l, &sbi->s_orphan) {
 549                 struct inode *inode = orphan_list_entry(l);
 550                 printk(KERN_ERR "  "
 551                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 552                        inode->i_sb->s_id, inode->i_ino, inode,
 553                        inode->i_mode, inode->i_nlink,
 554                        NEXT_ORPHAN(inode));
 555         }
 556 }
 557
 558 static void ext4_put_super(struct super_block *sb)
 559 {
 560         struct ext4_sb_info *sbi = EXT4_SB(sb);
 561         struct ext4_super_block *es = sbi->s_es;
 562         int i, err;
 563
 564         ext4_mb_release(sb);
 565         ext4_ext_release(sb);
 566         ext4_xattr_put_super(sb);
 567         if (sbi->s_journal) {
 568                 err = jbd2_journal_destroy(sbi->s_journal);
 569                 sbi->s_journal = NULL;
 570                 if (err < 0)
 571                         ext4_abort(sb, __func__,
 572                                    "Couldn't clean up the journal");
 573         }
 574         if (!(sb->s_flags & MS_RDONLY)) {
 575                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 576                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 577                 ext4_commit_super(sb, 1);
 578         }
 579         if (sbi->s_proc) {
 580                 remove_proc_entry(sb->s_id, ext4_proc_root);
 581         }
 582         kobject_del(&sbi->s_kobj);
 583
 584         for (i = 0; i < sbi->s_gdb_count; i++)
 585                 brelse(sbi->s_group_desc[i]);
 586         kfree(sbi->s_group_desc);
 587         if (is_vmalloc_addr(sbi->s_flex_groups))
 588                 vfree(sbi->s_flex_groups);
 589         else
 590                 kfree(sbi->s_flex_groups);
 591         percpu_counter_destroy(&sbi->s_freeblocks_counter);
 592         percpu_counter_destroy(&sbi->s_freeinodes_counter);
 593         percpu_counter_destroy(&sbi->s_dirs_counter);
 594         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 595         brelse(sbi->s_sbh);
 596 #ifdef CONFIG_QUOTA
 597         for (i = 0; i < MAXQUOTAS; i++)
 598                 kfree(sbi->s_qf_names[i]);
 599 #endif
 600
 601         /* Debugging code just in case the in-memory inode orphan list
 602          * isn't empty.  The on-disk one can be non-empty if we've
 603          * detected an error and taken the fs readonly, but the
 604          * in-memory list had better be clean by this point. */
 605         if (!list_empty(&sbi->s_orphan))
 606                 dump_orphan_list(sb, sbi);
 607         J_ASSERT(list_empty(&sbi->s_orphan));
 608
 609         invalidate_bdev(sb->s_bdev);
 610         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 611                 /*
 612                  * Invalidate the journal device's buffers.  We don't want them
 613                  * floating about in memory - the physical journal device may
 614                  * hotswapped, and it breaks the `ro-after' testing code.
 615                  */
 616                 sync_blockdev(sbi->journal_bdev);
 617                 invalidate_bdev(sbi->journal_bdev);
 618                 ext4_blkdev_remove(sbi);
 619         }
 620         sb->s_fs_info = NULL;
 621         /*
 622          * Now that we are completely done shutting down the
 623          * superblock, we need to actually destroy the kobject.
 624          */
 625         unlock_kernel();
 626         unlock_super(sb);
 627         kobject_put(&sbi->s_kobj);
 628         wait_for_completion(&sbi->s_kobj_unregister);
 629         lock_super(sb);
 630         lock_kernel();
 631         kfree(sbi->s_blockgroup_lock);
 632         kfree(sbi);
 633         return;
 634 }
 635
 636 static struct kmem_cache *ext4_inode_cachep;
 637
 638 /*
 639  * Called inside transaction, so use GFP_NOFS
 640  */
 641 static struct inode *ext4_alloc_inode(struct super_block *sb)
 642 {
 643         struct ext4_inode_info *ei;
 644
 645         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 646         if (!ei)
 647                 return NULL;
 648 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 649         ei->i_acl = EXT4_ACL_NOT_CACHED;
 650         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 651 #endif
 652         ei->vfs_inode.i_version = 1;
 653         ei->vfs_inode.i_data.writeback_index = 0;
 654         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 655         INIT_LIST_HEAD(&ei->i_prealloc_list);
 656         spin_lock_init(&ei->i_prealloc_lock);
 657         /*
 658          * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
 659          * therefore it can be null here.  Don't check it, just initialize
 660          * jinode.
 661          */
 662         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 663         ei->i_reserved_data_blocks = 0;
 664         ei->i_reserved_meta_blocks = 0;
 665         ei->i_allocated_meta_blocks = 0;
 666         ei->i_delalloc_reserved_flag = 0;
 667         spin_lock_init(&(ei->i_block_reservation_lock));
 668         return &ei->vfs_inode;
 669 }
 670
 671 static void ext4_destroy_inode(struct inode *inode)
 672 {
 673         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 674                 printk("EXT4 Inode %p: orphan list check failed!\n",
 675                         EXT4_I(inode));
 676                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 677                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
 678                                 true);
 679                 dump_stack();
 680         }
 681         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 682 }
 683
 684 static void init_once(void *foo)
 685 {
 686         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 687
 688         INIT_LIST_HEAD(&ei->i_orphan);
 689 #ifdef CONFIG_EXT4_FS_XATTR
 690         init_rwsem(&ei->xattr_sem);
 691 #endif
 692         init_rwsem(&ei->i_data_sem);
 693         inode_init_once(&ei->vfs_inode);
 694 }
 695
 696 static int init_inodecache(void)
 697 {
 698         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 699                                              sizeof(struct ext4_inode_info),
 700                                              0, (SLAB_RECLAIM_ACCOUNT|
 701                                                 SLAB_MEM_SPREAD),
 702                                              init_once);
 703         if (ext4_inode_cachep == NULL)
 704                 return -ENOMEM;
 705         return 0;
 706 }
 707
 708 static void destroy_inodecache(void)
 709 {
 710         kmem_cache_destroy(ext4_inode_cachep);
 711 }
 712
 713 static void ext4_clear_inode(struct inode *inode)
 714 {
 715 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 716         if (EXT4_I(inode)->i_acl &&
 717                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
 718                 posix_acl_release(EXT4_I(inode)->i_acl);
 719                 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
 720         }
 721         if (EXT4_I(inode)->i_default_acl &&
 722                         EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
 723                 posix_acl_release(EXT4_I(inode)->i_default_acl);
 724                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
 725         }
 726 #endif
 727         ext4_discard_preallocations(inode);
 728         if (EXT4_JOURNAL(inode))
 729                 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
 730                                        &EXT4_I(inode)->jinode);
 731 }
 732
 733 static inline void ext4_show_quota_options(struct seq_file *seq,
 734                                            struct super_block *sb)
 735 {
 736 #if defined(CONFIG_QUOTA)
 737         struct ext4_sb_info *sbi = EXT4_SB(sb);
 738
 739         if (sbi->s_jquota_fmt)
 740                 seq_printf(seq, ",jqfmt=%s",
 741                 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
 742
 743         if (sbi->s_qf_names[USRQUOTA])
 744                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 745
 746         if (sbi->s_qf_names[GRPQUOTA])
 747                 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 748
 749         if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 750                 seq_puts(seq, ",usrquota");
 751
 752         if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 753                 seq_puts(seq, ",grpquota");
 754 #endif
 755 }
 756
 757 /*
 758  * Show an option if
 759  *  - it's set to a non-default value OR
 760  *  - if the per-sb default is different from the global default
 761  */
 762 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 763 {
 764         int def_errors;
 765         unsigned long def_mount_opts;
 766         struct super_block *sb = vfs->mnt_sb;
 767         struct ext4_sb_info *sbi = EXT4_SB(sb);
 768         struct ext4_super_block *es = sbi->s_es;
 769
 770         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 771         def_errors     = le16_to_cpu(es->s_errors);
 772
 773         if (sbi->s_sb_block != 1)
 774                 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
 775         if (test_opt(sb, MINIX_DF))
 776                 seq_puts(seq, ",minixdf");
 777         if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
 778                 seq_puts(seq, ",grpid");
 779         if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
 780                 seq_puts(seq, ",nogrpid");
 781         if (sbi->s_resuid != EXT4_DEF_RESUID ||
 782             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
 783                 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
 784         }
 785         if (sbi->s_resgid != EXT4_DEF_RESGID ||
 786             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
 787                 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
 788         }
 789         if (test_opt(sb, ERRORS_RO)) {
 790                 if (def_errors == EXT4_ERRORS_PANIC ||
 791                     def_errors == EXT4_ERRORS_CONTINUE) {
 792                         seq_puts(seq, ",errors=remount-ro");
 793                 }
 794         }
 795         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
 796                 seq_puts(seq, ",errors=continue");
 797         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
 798                 seq_puts(seq, ",errors=panic");
 799         if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
 800                 seq_puts(seq, ",nouid32");
 801         if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
 802                 seq_puts(seq, ",debug");
 803         if (test_opt(sb, OLDALLOC))
 804                 seq_puts(seq, ",oldalloc");
 805 #ifdef CONFIG_EXT4_FS_XATTR
 806         if (test_opt(sb, XATTR_USER) &&
 807                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
 808                 seq_puts(seq, ",user_xattr");
 809         if (!test_opt(sb, XATTR_USER) &&
 810             (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
 811                 seq_puts(seq, ",nouser_xattr");
 812         }
 813 #endif
 814 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 815         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
 816                 seq_puts(seq, ",acl");
 817         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
 818                 seq_puts(seq, ",noacl");
 819 #endif
 820         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
 821                 seq_printf(seq, ",commit=%u",
 822                            (unsigned) (sbi->s_commit_interval / HZ));
 823         }
 824         if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
 825                 seq_printf(seq, ",min_batch_time=%u",
 826                            (unsigned) sbi->s_min_batch_time);
 827         }
 828         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
 829                 seq_printf(seq, ",max_batch_time=%u",
 830                            (unsigned) sbi->s_min_batch_time);
 831         }
 832
 833         /*
 834          * We're changing the default of barrier mount option, so
 835          * let's always display its mount state so it's clear what its
 836          * status is.
 837          */
 838         seq_puts(seq, ",barrier=");
 839         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 840         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 841                 seq_puts(seq, ",journal_async_commit");
 842         if (test_opt(sb, NOBH))
 843                 seq_puts(seq, ",nobh");
 844         if (test_opt(sb, I_VERSION))
 845                 seq_puts(seq, ",i_version");
 846         if (!test_opt(sb, DELALLOC))
 847                 seq_puts(seq, ",nodelalloc");
 848
 849
 850         if (sbi->s_stripe)
 851                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 852         /*
 853          * journal mode get enabled in different ways
 854          * So just print the value even if we didn't specify it
 855          */
 856         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 857                 seq_puts(seq, ",data=journal");
 858         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 859                 seq_puts(seq, ",data=ordered");
 860         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 861                 seq_puts(seq, ",data=writeback");
 862
 863         if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
 864                 seq_printf(seq, ",inode_readahead_blks=%u",
 865                            sbi->s_inode_readahead_blks);
 866
 867         if (test_opt(sb, DATA_ERR_ABORT))
 868                 seq_puts(seq, ",data_err=abort");
 869
 870         if (test_opt(sb, NO_AUTO_DA_ALLOC))
 871                 seq_puts(seq, ",noauto_da_alloc");
 872
 873         ext4_show_quota_options(seq, sb);
 874         return 0;
 875 }
 876
 877
 878 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 879                 u64 ino, u32 generation)
 880 {
 881         struct inode *inode;
 882
 883         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 884                 return ERR_PTR(-ESTALE);
 885         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 886                 return ERR_PTR(-ESTALE);
 887
 888         /* iget isn't really right if the inode is currently unallocated!!
 889          *
 890          * ext4_read_inode will return a bad_inode if the inode had been
 891          * deleted, so we should be safe.
 892          *
 893          * Currently we don't know the generation for parent directory, so
 894          * a generation of 0 means "accept any"
 895          */
 896         inode = ext4_iget(sb, ino);
 897         if (IS_ERR(inode))
 898                 return ERR_CAST(inode);
 899         if (generation && inode->i_generation != generation) {
 900                 iput(inode);
 901                 return ERR_PTR(-ESTALE);
 902         }
 903
 904         return inode;
 905 }
 906
 907 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
 908                 int fh_len, int fh_type)
 909 {
 910         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 911                                     ext4_nfs_get_inode);
 912 }
 913
 914 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 915                 int fh_len, int fh_type)
 916 {
 917         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 918                                     ext4_nfs_get_inode);
 919 }
 920
 921 /*
 922  * Try to release metadata pages (indirect blocks, directories) which are
 923  * mapped via the block device.  Since these pages could have journal heads
 924  * which would prevent try_to_free_buffers() from freeing them, we must use
 925  * jbd2 layer's try_to_free_buffers() function to release them.
 926  */
 927 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
 928 {
 929         journal_t *journal = EXT4_SB(sb)->s_journal;
 930
 931         WARN_ON(PageChecked(page));
 932         if (!page_has_buffers(page))
 933                 return 0;
 934         if (journal)
 935                 return jbd2_journal_try_to_free_buffers(journal, page,
 936                                                         wait & ~__GFP_WAIT);
 937         return try_to_free_buffers(page);
 938 }
 939
 940 #ifdef CONFIG_QUOTA
 941 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
 942 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 943
 944 static int ext4_write_dquot(struct dquot *dquot);
 945 static int ext4_acquire_dquot(struct dquot *dquot);
 946 static int ext4_release_dquot(struct dquot *dquot);
 947 static int ext4_mark_dquot_dirty(struct dquot *dquot);
 948 static int ext4_write_info(struct super_block *sb, int type);
 949 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 950                                 char *path, int remount);
 951 static int ext4_quota_on_mount(struct super_block *sb, int type);
 952 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 953                                size_t len, loff_t off);
 954 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 955                                 const char *data, size_t len, loff_t off);
 956
 957 static struct dquot_operations ext4_quota_operations = {
 958         .initialize     = dquot_initialize,
 959         .drop           = dquot_drop,
 960         .alloc_space    = dquot_alloc_space,
 961         .reserve_space  = dquot_reserve_space,
 962         .claim_space    = dquot_claim_space,
 963         .release_rsv    = dquot_release_reserved_space,
 964         .get_reserved_space = ext4_get_reserved_space,
 965         .alloc_inode    = dquot_alloc_inode,
 966         .free_space     = dquot_free_space,
 967         .free_inode     = dquot_free_inode,
 968         .transfer       = dquot_transfer,
 969         .write_dquot    = ext4_write_dquot,
 970         .acquire_dquot  = ext4_acquire_dquot,
 971         .release_dquot  = ext4_release_dquot,
 972         .mark_dirty     = ext4_mark_dquot_dirty,
 973         .write_info     = ext4_write_info,
 974         .alloc_dquot    = dquot_alloc,
 975         .destroy_dquot  = dquot_destroy,
 976 };
 977
 978 static struct quotactl_ops ext4_qctl_operations = {
 979         .quota_on       = ext4_quota_on,
 980         .quota_off      = vfs_quota_off,
 981         .quota_sync     = vfs_quota_sync,
 982         .get_info       = vfs_get_dqinfo,
 983         .set_info       = vfs_set_dqinfo,
 984         .get_dqblk      = vfs_get_dqblk,
 985         .set_dqblk      = vfs_set_dqblk
 986 };
 987 #endif
 988
 989 static const struct super_operations ext4_sops = {
 990         .alloc_inode    = ext4_alloc_inode,
 991         .destroy_inode  = ext4_destroy_inode,
 992         .write_inode    = ext4_write_inode,
 993         .dirty_inode    = ext4_dirty_inode,
 994         .delete_inode   = ext4_delete_inode,
 995         .put_super      = ext4_put_super,
 996         .sync_fs        = ext4_sync_fs,
 997         .freeze_fs      = ext4_freeze,
 998         .unfreeze_fs    = ext4_unfreeze,
 999         .statfs         = ext4_statfs,
1000         .remount_fs     = ext4_remount,
1001         .clear_inode    = ext4_clear_inode,
1002         .show_options   = ext4_show_options,
1003 #ifdef CONFIG_QUOTA
1004         .quota_read     = ext4_quota_read,
1005         .quota_write    = ext4_quota_write,
1006 #endif
1007         .bdev_try_to_free_page = bdev_try_to_free_page,
1008 };
1009
1010 static const struct super_operations ext4_nojournal_sops = {
1011         .alloc_inode    = ext4_alloc_inode,
1012         .destroy_inode  = ext4_destroy_inode,
1013         .write_inode    = ext4_write_inode,
1014         .dirty_inode    = ext4_dirty_inode,
1015         .delete_inode   = ext4_delete_inode,
1016         .write_super    = ext4_write_super,
1017         .put_super      = ext4_put_super,
1018         .statfs         = ext4_statfs,
1019         .remount_fs     = ext4_remount,
1020         .clear_inode    = ext4_clear_inode,
1021         .show_options   = ext4_show_options,
1022 #ifdef CONFIG_QUOTA
1023         .quota_read     = ext4_quota_read,
1024         .quota_write    = ext4_quota_write,
1025 #endif
1026         .bdev_try_to_free_page = bdev_try_to_free_page,
1027 };
1028
1029 static const struct export_operations ext4_export_ops = {
1030         .fh_to_dentry = ext4_fh_to_dentry,
1031         .fh_to_parent = ext4_fh_to_parent,
1032         .get_parent = ext4_get_parent,
1033 };
1034
1035 enum {
1036         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1037         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1038         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1039         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1040         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1041         Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1042         Opt_journal_update, Opt_journal_dev,
1043         Opt_journal_checksum, Opt_journal_async_commit,
1044         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1045         Opt_data_err_abort, Opt_data_err_ignore,
1046         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1047         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1048         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1049         Opt_usrquota, Opt_grpquota, Opt_i_version,
1050         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1051         Opt_inode_readahead_blks, Opt_journal_ioprio
1052 };
1053
1054 static const match_table_t tokens = {
1055         {Opt_bsd_df, "bsddf"},
1056         {Opt_minix_df, "minixdf"},
1057         {Opt_grpid, "grpid"},
1058         {Opt_grpid, "bsdgroups"},
1059         {Opt_nogrpid, "nogrpid"},
1060         {Opt_nogrpid, "sysvgroups"},
1061         {Opt_resgid, "resgid=%u"},
1062         {Opt_resuid, "resuid=%u"},
1063         {Opt_sb, "sb=%u"},
1064         {Opt_err_cont, "errors=continue"},
1065         {Opt_err_panic, "errors=panic"},
1066         {Opt_err_ro, "errors=remount-ro"},
1067         {Opt_nouid32, "nouid32"},
1068         {Opt_debug, "debug"},
1069         {Opt_oldalloc, "oldalloc"},
1070         {Opt_orlov, "orlov"},
1071         {Opt_user_xattr, "user_xattr"},
1072         {Opt_nouser_xattr, "nouser_xattr"},
1073         {Opt_acl, "acl"},
1074         {Opt_noacl, "noacl"},
1075         {Opt_noload, "noload"},
1076         {Opt_nobh, "nobh"},
1077         {Opt_bh, "bh"},
1078         {Opt_commit, "commit=%u"},
1079         {Opt_min_batch_time, "min_batch_time=%u"},
1080         {Opt_max_batch_time, "max_batch_time=%u"},
1081         {Opt_journal_update, "journal=update"},
1082         {Opt_journal_dev, "journal_dev=%u"},
1083         {Opt_journal_checksum, "journal_checksum"},
1084         {Opt_journal_async_commit, "journal_async_commit"},
1085         {Opt_abort, "abort"},
1086         {Opt_data_journal, "data=journal"},
1087         {Opt_data_ordered, "data=ordered"},
1088         {Opt_data_writeback, "data=writeback"},
1089         {Opt_data_err_abort, "data_err=abort"},
1090         {Opt_data_err_ignore, "data_err=ignore"},
1091         {Opt_offusrjquota, "usrjquota="},
1092         {Opt_usrjquota, "usrjquota=%s"},
1093         {Opt_offgrpjquota, "grpjquota="},
1094         {Opt_grpjquota, "grpjquota=%s"},
1095         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1096         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1097         {Opt_grpquota, "grpquota"},
1098         {Opt_noquota, "noquota"},
1099         {Opt_quota, "quota"},
1100         {Opt_usrquota, "usrquota"},
1101         {Opt_barrier, "barrier=%u"},
1102         {Opt_barrier, "barrier"},
1103         {Opt_nobarrier, "nobarrier"},
1104         {Opt_i_version, "i_version"},
1105         {Opt_stripe, "stripe=%u"},
1106         {Opt_resize, "resize"},
1107         {Opt_delalloc, "delalloc"},
1108         {Opt_nodelalloc, "nodelalloc"},
1109         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1110         {Opt_journal_ioprio, "journal_ioprio=%u"},
1111         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1112         {Opt_auto_da_alloc, "auto_da_alloc"},
1113         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1114         {Opt_err, NULL},
1115 };
1116
1117 static ext4_fsblk_t get_sb_block(void **data)
1118 {
1119         ext4_fsblk_t    sb_block;
1120         char            *options = (char *) *data;
1121
1122         if (!options || strncmp(options, "sb=", 3) != 0)
1123                 return 1;       /* Default location */
1124         options += 3;
1125         /*todo: use simple_strtoll with >32bit ext4 */
1126         sb_block = simple_strtoul(options, &options, 0);
1127         if (*options && *options != ',') {
1128                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1129                        (char *) *data);
1130                 return 1;
1131         }
1132         if (*options == ',')
1133                 options++;
1134         *data = (void *) options;
1135         return sb_block;
1136 }
1137
1138 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1139
1140 static int parse_options(char *options, struct super_block *sb,
1141                          unsigned long *journal_devnum,
1142                          unsigned int *journal_ioprio,
1143                          ext4_fsblk_t *n_blocks_count, int is_remount)
1144 {
1145         struct ext4_sb_info *sbi = EXT4_SB(sb);
1146         char *p;
1147         substring_t args[MAX_OPT_ARGS];
1148         int data_opt = 0;
1149         int option;
1150 #ifdef CONFIG_QUOTA
1151         int qtype, qfmt;
1152         char *qname;
1153 #endif
1154
1155         if (!options)
1156                 return 1;
1157
1158         while ((p = strsep(&options, ",")) != NULL) {
1159                 int token;
1160                 if (!*p)
1161                         continue;
1162
1163                 token = match_token(p, tokens, args);
1164                 switch (token) {
1165                 case Opt_bsd_df:
1166                         clear_opt(sbi->s_mount_opt, MINIX_DF);
1167                         break;
1168                 case Opt_minix_df:
1169                         set_opt(sbi->s_mount_opt, MINIX_DF);
1170                         break;
1171                 case Opt_grpid:
1172                         set_opt(sbi->s_mount_opt, GRPID);
1173                         break;
1174                 case Opt_nogrpid:
1175                         clear_opt(sbi->s_mount_opt, GRPID);
1176                         break;
1177                 case Opt_resuid:
1178                         if (match_int(&args[0], &option))
1179                                 return 0;
1180                         sbi->s_resuid = option;
1181                         break;
1182                 case Opt_resgid:
1183                         if (match_int(&args[0], &option))
1184                                 return 0;
1185                         sbi->s_resgid = option;
1186                         break;
1187                 case Opt_sb:
1188                         /* handled by get_sb_block() instead of here */
1189                         /* *sb_block = match_int(&args[0]); */
1190                         break;
1191                 case Opt_err_panic:
1192                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1193                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1194                         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1195                         break;
1196                 case Opt_err_ro:
1197                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1198                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1199                         set_opt(sbi->s_mount_opt, ERRORS_RO);
1200                         break;
1201                 case Opt_err_cont:
1202                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1203                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1204                         set_opt(sbi->s_mount_opt, ERRORS_CONT);
1205                         break;
1206                 case Opt_nouid32:
1207                         set_opt(sbi->s_mount_opt, NO_UID32);
1208                         break;
1209                 case Opt_debug:
1210                         set_opt(sbi->s_mount_opt, DEBUG);
1211                         break;
1212                 case Opt_oldalloc:
1213                         set_opt(sbi->s_mount_opt, OLDALLOC);
1214                         break;
1215                 case Opt_orlov:
1216                         clear_opt(sbi->s_mount_opt, OLDALLOC);
1217                         break;
1218 #ifdef CONFIG_EXT4_FS_XATTR
1219                 case Opt_user_xattr:
1220                         set_opt(sbi->s_mount_opt, XATTR_USER);
1221                         break;
1222                 case Opt_nouser_xattr:
1223                         clear_opt(sbi->s_mount_opt, XATTR_USER);
1224                         break;
1225 #else
1226                 case Opt_user_xattr:
1227                 case Opt_nouser_xattr:
1228                         printk(KERN_ERR "EXT4 (no)user_xattr options "
1229                                "not supported\n");
1230                         break;
1231 #endif
1232 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1233                 case Opt_acl:
1234                         set_opt(sbi->s_mount_opt, POSIX_ACL);
1235                         break;
1236                 case Opt_noacl:
1237                         clear_opt(sbi->s_mount_opt, POSIX_ACL);
1238                         break;
1239 #else
1240                 case Opt_acl:
1241                 case Opt_noacl:
1242                         printk(KERN_ERR "EXT4 (no)acl options "
1243                                "not supported\n");
1244                         break;
1245 #endif
1246                 case Opt_journal_update:
1247                         /* @@@ FIXME */
1248                         /* Eventually we will want to be able to create
1249                            a journal file here.  For now, only allow the
1250                            user to specify an existing inode to be the
1251                            journal file. */
1252                         if (is_remount) {
1253                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1254                                        "journal on remount\n");
1255                                 return 0;
1256                         }
1257                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1258                         break;
1259                 case Opt_journal_dev:
1260                         if (is_remount) {
1261                                 printk(KERN_ERR "EXT4-fs: cannot specify "
1262                                        "journal on remount\n");
1263                                 return 0;
1264                         }
1265                         if (match_int(&args[0], &option))
1266                                 return 0;
1267                         *journal_devnum = option;
1268                         break;
1269                 case Opt_journal_checksum:
1270                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1271                         break;
1272                 case Opt_journal_async_commit:
1273                         set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1274                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1275                         break;
1276                 case Opt_noload:
1277                         set_opt(sbi->s_mount_opt, NOLOAD);
1278                         break;
1279                 case Opt_commit:
1280                         if (match_int(&args[0], &option))
1281                                 return 0;
1282                         if (option < 0)
1283                                 return 0;
1284                         if (option == 0)
1285                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1286                         sbi->s_commit_interval = HZ * option;
1287                         break;
1288                 case Opt_max_batch_time:
1289                         if (match_int(&args[0], &option))
1290                                 return 0;
1291                         if (option < 0)
1292                                 return 0;
1293                         if (option == 0)
1294                                 option = EXT4_DEF_MAX_BATCH_TIME;
1295                         sbi->s_max_batch_time = option;
1296                         break;
1297                 case Opt_min_batch_time:
1298                         if (match_int(&args[0], &option))
1299                                 return 0;
1300                         if (option < 0)
1301                                 return 0;
1302                         sbi->s_min_batch_time = option;
1303                         break;
1304                 case Opt_data_journal:
1305                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
1306                         goto datacheck;
1307                 case Opt_data_ordered:
1308                         data_opt = EXT4_MOUNT_ORDERED_DATA;
1309                         goto datacheck;
1310                 case Opt_data_writeback:
1311                         data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1312                 datacheck:
1313                         if (is_remount) {
1314                                 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1315                                                 != data_opt) {
1316                                         printk(KERN_ERR
1317                                                 "EXT4-fs: cannot change data "
1318                                                 "mode on remount\n");
1319                                         return 0;
1320                                 }
1321                         } else {
1322                                 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1323                                 sbi->s_mount_opt |= data_opt;
1324                         }
1325                         break;
1326                 case Opt_data_err_abort:
1327                         set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1328                         break;
1329                 case Opt_data_err_ignore:
1330                         clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1331                         break;
1332 #ifdef CONFIG_QUOTA
1333                 case Opt_usrjquota:
1334                         qtype = USRQUOTA;
1335                         goto set_qf_name;
1336                 case Opt_grpjquota:
1337                         qtype = GRPQUOTA;
1338 set_qf_name:
1339                         if (sb_any_quota_loaded(sb) &&
1340                             !sbi->s_qf_names[qtype]) {
1341                                 printk(KERN_ERR
1342                                        "EXT4-fs: Cannot change journaled "
1343                                        "quota options when quota turned on.\n");
1344                                 return 0;
1345                         }
1346                         qname = match_strdup(&args[0]);
1347                         if (!qname) {
1348                                 printk(KERN_ERR
1349                                         "EXT4-fs: not enough memory for "
1350                                         "storing quotafile name.\n");
1351                                 return 0;
1352                         }
1353                         if (sbi->s_qf_names[qtype] &&
1354                             strcmp(sbi->s_qf_names[qtype], qname)) {
1355                                 printk(KERN_ERR
1356                                         "EXT4-fs: %s quota file already "
1357                                         "specified.\n", QTYPE2NAME(qtype));
1358                                 kfree(qname);
1359                                 return 0;
1360                         }
1361                         sbi->s_qf_names[qtype] = qname;
1362                         if (strchr(sbi->s_qf_names[qtype], '/')) {
1363                                 printk(KERN_ERR
1364                                         "EXT4-fs: quotafile must be on "
1365                                         "filesystem root.\n");
1366                                 kfree(sbi->s_qf_names[qtype]);
1367                                 sbi->s_qf_names[qtype] = NULL;
1368                                 return 0;
1369                         }
1370                         set_opt(sbi->s_mount_opt, QUOTA);
1371                         break;
1372                 case Opt_offusrjquota:
1373                         qtype = USRQUOTA;
1374                         goto clear_qf_name;
1375                 case Opt_offgrpjquota:
1376                         qtype = GRPQUOTA;
1377 clear_qf_name:
1378                         if (sb_any_quota_loaded(sb) &&
1379                             sbi->s_qf_names[qtype]) {
1380                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1381                                         "journaled quota options when "
1382                                         "quota turned on.\n");
1383                                 return 0;
1384                         }
1385                         /*
1386                          * The space will be released later when all options
1387                          * are confirmed to be correct
1388                          */
1389                         sbi->s_qf_names[qtype] = NULL;
1390                         break;
1391                 case Opt_jqfmt_vfsold:
1392                         qfmt = QFMT_VFS_OLD;
1393                         goto set_qf_format;
1394                 case Opt_jqfmt_vfsv0:
1395                         qfmt = QFMT_VFS_V0;
1396 set_qf_format:
1397                         if (sb_any_quota_loaded(sb) &&
1398                             sbi->s_jquota_fmt != qfmt) {
1399                                 printk(KERN_ERR "EXT4-fs: Cannot change "
1400                                         "journaled quota options when "
1401                                         "quota turned on.\n");
1402                                 return 0;
1403                         }
1404                         sbi->s_jquota_fmt = qfmt;
1405                         break;
1406                 case Opt_quota:
1407                 case Opt_usrquota:
1408                         set_opt(sbi->s_mount_opt, QUOTA);
1409                         set_opt(sbi->s_mount_opt, USRQUOTA);
1410                         break;
1411                 case Opt_grpquota:
1412                         set_opt(sbi->s_mount_opt, QUOTA);
1413                         set_opt(sbi->s_mount_opt, GRPQUOTA);
1414                         break;
1415                 case Opt_noquota:
1416                         if (sb_any_quota_loaded(sb)) {
1417                                 printk(KERN_ERR "EXT4-fs: Cannot change quota "
1418                                         "options when quota turned on.\n");
1419                                 return 0;
1420                         }
1421                         clear_opt(sbi->s_mount_opt, QUOTA);
1422                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1423                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1424                         break;
1425 #else
1426                 case Opt_quota:
1427                 case Opt_usrquota:
1428                 case Opt_grpquota:
1429                         printk(KERN_ERR
1430                                 "EXT4-fs: quota options not supported.\n");
1431                         break;
1432                 case Opt_usrjquota:
1433                 case Opt_grpjquota:
1434                 case Opt_offusrjquota:
1435                 case Opt_offgrpjquota:
1436                 case Opt_jqfmt_vfsold:
1437                 case Opt_jqfmt_vfsv0:
1438                         printk(KERN_ERR
1439                                 "EXT4-fs: journaled quota options not "
1440                                 "supported.\n");
1441                         break;
1442                 case Opt_noquota:
1443                         break;
1444 #endif
1445                 case Opt_abort:
1446                         set_opt(sbi->s_mount_opt, ABORT);
1447                         break;
1448                 case Opt_nobarrier:
1449                         clear_opt(sbi->s_mount_opt, BARRIER);
1450                         break;
1451                 case Opt_barrier:
1452                         if (match_int(&args[0], &option)) {
1453                                 set_opt(sbi->s_mount_opt, BARRIER);
1454                                 break;
1455                         }
1456                         if (option)
1457                                 set_opt(sbi->s_mount_opt, BARRIER);
1458                         else
1459                                 clear_opt(sbi->s_mount_opt, BARRIER);
1460                         break;
1461                 case Opt_ignore:
1462                         break;
1463                 case Opt_resize:
1464                         if (!is_remount) {
1465                                 printk("EXT4-fs: resize option only available "
1466                                         "for remount\n");
1467                                 return 0;
1468                         }
1469                         if (match_int(&args[0], &option) != 0)
1470                                 return 0;
1471                         *n_blocks_count = option;
1472                         break;
1473                 case Opt_nobh:
1474                         set_opt(sbi->s_mount_opt, NOBH);
1475                         break;
1476                 case Opt_bh:
1477                         clear_opt(sbi->s_mount_opt, NOBH);
1478                         break;
1479                 case Opt_i_version:
1480                         set_opt(sbi->s_mount_opt, I_VERSION);
1481                         sb->s_flags |= MS_I_VERSION;
1482                         break;
1483                 case Opt_nodelalloc:
1484                         clear_opt(sbi->s_mount_opt, DELALLOC);
1485                         break;
1486                 case Opt_stripe:
1487                         if (match_int(&args[0], &option))
1488                                 return 0;
1489                         if (option < 0)
1490                                 return 0;
1491                         sbi->s_stripe = option;
1492                         break;
1493                 case Opt_delalloc:
1494                         set_opt(sbi->s_mount_opt, DELALLOC);
1495                         break;
1496                 case Opt_inode_readahead_blks:
1497                         if (match_int(&args[0], &option))
1498                                 return 0;
1499                         if (option < 0 || option > (1 << 30))
1500                                 return 0;
1501                         if (!is_power_of_2(option)) {
1502                                 printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
1503                                        " must be a power of 2\n");
1504                                 return 0;
1505                         }
1506                         sbi->s_inode_readahead_blks = option;
1507                         break;
1508                 case Opt_journal_ioprio:
1509                         if (match_int(&args[0], &option))
1510                                 return 0;
1511                         if (option < 0 || option > 7)
1512                                 break;
1513                         *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1514                                                             option);
1515                         break;
1516                 case Opt_noauto_da_alloc:
1517                         set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1518                         break;
1519                 case Opt_auto_da_alloc:
1520                         if (match_int(&args[0], &option)) {
1521                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1522                                 break;
1523                         }
1524                         if (option)
1525                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1526                         else
1527                                 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1528                         break;
1529                 default:
1530                         printk(KERN_ERR
1531                                "EXT4-fs: Unrecognized mount option \"%s\" "
1532                                "or missing value\n", p);
1533                         return 0;
1534                 }
1535         }
1536 #ifdef CONFIG_QUOTA
1537         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1538                 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1539                      sbi->s_qf_names[USRQUOTA])
1540                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1541
1542                 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1543                      sbi->s_qf_names[GRPQUOTA])
1544                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1545
1546                 if ((sbi->s_qf_names[USRQUOTA] &&
1547                                 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1548                     (sbi->s_qf_names[GRPQUOTA] &&
1549                                 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1550                         printk(KERN_ERR "EXT4-fs: old and new quota "
1551                                         "format mixing.\n");
1552                         return 0;
1553                 }
1554
1555                 if (!sbi->s_jquota_fmt) {
1556                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1557                                         "not specified.\n");
1558                         return 0;
1559                 }
1560         } else {
1561                 if (sbi->s_jquota_fmt) {
1562                         printk(KERN_ERR "EXT4-fs: journaled quota format "
1563                                         "specified with no journaling "
1564                                         "enabled.\n");
1565                         return 0;
1566                 }
1567         }
1568 #endif
1569         return 1;
1570 }
1571
1572 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1573                             int read_only)
1574 {
1575         struct ext4_sb_info *sbi = EXT4_SB(sb);
1576         int res = 0;
1577
1578         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1579                 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1580                        "forcing read-only mode\n");
1581                 res = MS_RDONLY;
1582         }
1583         if (read_only)
1584                 return res;
1585         if (!(sbi->s_mount_state & EXT4_VALID_FS))
1586                 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1587                        "running e2fsck is recommended\n");
1588         else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1589                 printk(KERN_WARNING
1590                        "EXT4-fs warning: mounting fs with errors, "
1591                        "running e2fsck is recommended\n");
1592         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1593                  le16_to_cpu(es->s_mnt_count) >=
1594                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1595                 printk(KERN_WARNING
1596                        "EXT4-fs warning: maximal mount count reached, "
1597                        "running e2fsck is recommended\n");
1598         else if (le32_to_cpu(es->s_checkinterval) &&
1599                 (le32_to_cpu(es->s_lastcheck) +
1600                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1601                 printk(KERN_WARNING
1602                        "EXT4-fs warning: checktime reached, "
1603                        "running e2fsck is recommended\n");
1604         if (!sbi->s_journal)
1605                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1606         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1607                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1608         le16_add_cpu(&es->s_mnt_count, 1);
1609         es->s_mtime = cpu_to_le32(get_seconds());
1610         ext4_update_dynamic_rev(sb);
1611         if (sbi->s_journal)
1612                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1613
1614         ext4_commit_super(sb, 1);
1615         if (test_opt(sb, DEBUG))
1616                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1617                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1618                         sb->s_blocksize,
1619                         sbi->s_groups_count,
1620                         EXT4_BLOCKS_PER_GROUP(sb),
1621                         EXT4_INODES_PER_GROUP(sb),
1622                         sbi->s_mount_opt);
1623
1624         if (EXT4_SB(sb)->s_journal) {
1625                 printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1626                        sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1627                        "external", EXT4_SB(sb)->s_journal->j_devname);
1628         } else {
1629                 printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1630         }
1631         return res;
1632 }
1633
1634 static int ext4_fill_flex_info(struct super_block *sb)
1635 {
1636         struct ext4_sb_info *sbi = EXT4_SB(sb);
1637         struct ext4_group_desc *gdp = NULL;
1638         struct buffer_head *bh;
1639         ext4_group_t flex_group_count;
1640         ext4_group_t flex_group;
1641         int groups_per_flex = 0;
1642         size_t size;
1643         int i;
1644
1645         if (!sbi->s_es->s_log_groups_per_flex) {
1646                 sbi->s_log_groups_per_flex = 0;
1647                 return 1;
1648         }
1649
1650         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1651         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1652
1653         /* We allocate both existing and potentially added groups */
1654         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1655                         ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1656                               EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1657         size = flex_group_count * sizeof(struct flex_groups);
1658         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1659         if (sbi->s_flex_groups == NULL) {
1660                 sbi->s_flex_groups = vmalloc(size);
1661                 if (sbi->s_flex_groups)
1662                         memset(sbi->s_flex_groups, 0, size);
1663         }
1664         if (sbi->s_flex_groups == NULL) {
1665                 printk(KERN_ERR "EXT4-fs: not enough memory for "
1666                                 "%u flex groups\n", flex_group_count);
1667                 goto failed;
1668         }
1669
1670         for (i = 0; i < sbi->s_groups_count; i++) {
1671                 gdp = ext4_get_group_desc(sb, i, &bh);
1672
1673                 flex_group = ext4_flex_group(sbi, i);
1674                 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1675                            ext4_free_inodes_count(sb, gdp));
1676                 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1677                            ext4_free_blks_count(sb, gdp));
1678                 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1679                            ext4_used_dirs_count(sb, gdp));
1680         }
1681
1682         return 1;
1683 failed:
1684         return 0;
1685 }
1686
1687 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1688                             struct ext4_group_desc *gdp)
1689 {
1690         __u16 crc = 0;
1691
1692         if (sbi->s_es->s_feature_ro_compat &
1693             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1694                 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1695                 __le32 le_group = cpu_to_le32(block_group);
1696
1697                 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1698                 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1699                 crc = crc16(crc, (__u8 *)gdp, offset);
1700                 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1701                 /* for checksum of struct ext4_group_desc do the rest...*/
1702                 if ((sbi->s_es->s_feature_incompat &
1703                      cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1704                     offset < le16_to_cpu(sbi->s_es->s_desc_size))
1705                         crc = crc16(crc, (__u8 *)gdp + offset,
1706                                     le16_to_cpu(sbi->s_es->s_desc_size) -
1707                                         offset);
1708         }
1709
1710         return cpu_to_le16(crc);
1711 }
1712
1713 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1714                                 struct ext4_group_desc *gdp)
1715 {
1716         if ((sbi->s_es->s_feature_ro_compat &
1717              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1718             (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1719                 return 0;
1720
1721         return 1;
1722 }
1723
1724 /* Called at mount-time, super-block is locked */
1725 static int ext4_check_descriptors(struct super_block *sb)
1726 {
1727         struct ext4_sb_info *sbi = EXT4_SB(sb);
1728         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1729         ext4_fsblk_t last_block;
1730         ext4_fsblk_t block_bitmap;
1731         ext4_fsblk_t inode_bitmap;
1732         ext4_fsblk_t inode_table;
1733         int flexbg_flag = 0;
1734         ext4_group_t i;
1735
1736         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1737                 flexbg_flag = 1;
1738
1739         ext4_debug("Checking group descriptors");
1740
1741         for (i = 0; i < sbi->s_groups_count; i++) {
1742                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1743
1744                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1745                         last_block = ext4_blocks_count(sbi->s_es) - 1;
1746                 else
1747                         last_block = first_block +
1748                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1749
1750                 block_bitmap = ext4_block_bitmap(sb, gdp);
1751                 if (block_bitmap < first_block || block_bitmap > last_block) {
1752                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1753                                "Block bitmap for group %u not in group "
1754                                "(block %llu)!\n", i, block_bitmap);
1755                         return 0;
1756                 }
1757                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1758                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1759                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1760                                "Inode bitmap for group %u not in group "
1761                                "(block %llu)!\n", i, inode_bitmap);
1762                         return 0;
1763                 }
1764                 inode_table = ext4_inode_table(sb, gdp);
1765                 if (inode_table < first_block ||
1766                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
1767                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1768                                "Inode table for group %u not in group "
1769                                "(block %llu)!\n", i, inode_table);
1770                         return 0;
1771                 }
1772                 spin_lock(sb_bgl_lock(sbi, i));
1773                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1774                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1775                                "Checksum for group %u failed (%u!=%u)\n",
1776                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1777                                gdp)), le16_to_cpu(gdp->bg_checksum));
1778                         if (!(sb->s_flags & MS_RDONLY)) {
1779                                 spin_unlock(sb_bgl_lock(sbi, i));
1780                                 return 0;
1781                         }
1782                 }
1783                 spin_unlock(sb_bgl_lock(sbi, i));
1784                 if (!flexbg_flag)
1785                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
1786         }
1787
1788         ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1789         sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1790         return 1;
1791 }
1792
1793 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1794  * the superblock) which were deleted from all directories, but held open by
1795  * a process at the time of a crash.  We walk the list and try to delete these
1796  * inodes at recovery time (only with a read-write filesystem).
1797  *
1798  * In order to keep the orphan inode chain consistent during traversal (in
1799  * case of crash during recovery), we link each inode into the superblock
1800  * orphan list_head and handle it the same way as an inode deletion during
1801  * normal operation (which journals the operations for us).
1802  *
1803  * We only do an iget() and an iput() on each inode, which is very safe if we
1804  * accidentally point at an in-use or already deleted inode.  The worst that
1805  * can happen in this case is that we get a "bit already cleared" message from
1806  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1807  * e2fsck was run on this filesystem, and it must have already done the orphan
1808  * inode cleanup for us, so we can safely abort without any further action.
1809  */
1810 static void ext4_orphan_cleanup(struct super_block *sb,
1811                                 struct ext4_super_block *es)
1812 {
1813         unsigned int s_flags = sb->s_flags;
1814         int nr_orphans = 0, nr_truncates = 0;
1815 #ifdef CONFIG_QUOTA
1816         int i;
1817 #endif
1818         if (!es->s_last_orphan) {
1819                 jbd_debug(4, "no orphan inodes to clean up\n");
1820                 return;
1821         }
1822
1823         if (bdev_read_only(sb->s_bdev)) {
1824                 printk(KERN_ERR "EXT4-fs: write access "
1825                         "unavailable, skipping orphan cleanup.\n");
1826                 return;
1827         }
1828
1829         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1830                 if (es->s_last_orphan)
1831                         jbd_debug(1, "Errors on filesystem, "
1832                                   "clearing orphan list.\n");
1833                 es->s_last_orphan = 0;
1834                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1835                 return;
1836         }
1837
1838         if (s_flags & MS_RDONLY) {
1839                 printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1840                        sb->s_id);
1841                 sb->s_flags &= ~MS_RDONLY;
1842         }
1843 #ifdef CONFIG_QUOTA
1844         /* Needed for iput() to work correctly and not trash data */
1845         sb->s_flags |= MS_ACTIVE;
1846         /* Turn on quotas so that they are updated correctly */
1847         for (i = 0; i < MAXQUOTAS; i++) {
1848                 if (EXT4_SB(sb)->s_qf_names[i]) {
1849                         int ret = ext4_quota_on_mount(sb, i);
1850                         if (ret < 0)
1851                                 printk(KERN_ERR
1852                                         "EXT4-fs: Cannot turn on journaled "
1853                                         "quota: error %d\n", ret);
1854                 }
1855         }
1856 #endif
1857
1858         while (es->s_last_orphan) {
1859                 struct inode *inode;
1860
1861                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1862                 if (IS_ERR(inode)) {
1863                         es->s_last_orphan = 0;
1864                         break;
1865                 }
1866
1867                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1868                 vfs_dq_init(inode);
1869                 if (inode->i_nlink) {
1870                         printk(KERN_DEBUG
1871                                 "%s: truncating inode %lu to %lld bytes\n",
1872                                 __func__, inode->i_ino, inode->i_size);
1873                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1874                                   inode->i_ino, inode->i_size);
1875                         ext4_truncate(inode);
1876                         nr_truncates++;
1877                 } else {
1878                         printk(KERN_DEBUG
1879                                 "%s: deleting unreferenced inode %lu\n",
1880                                 __func__, inode->i_ino);
1881                         jbd_debug(2, "deleting unreferenced inode %lu\n",
1882                                   inode->i_ino);
1883                         nr_orphans++;
1884                 }
1885                 iput(inode);  /* The delete magic happens here! */
1886         }
1887
1888 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1889
1890         if (nr_orphans)
1891                 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1892                        sb->s_id, PLURAL(nr_orphans));
1893         if (nr_truncates)
1894                 printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1895                        sb->s_id, PLURAL(nr_truncates));
1896 #ifdef CONFIG_QUOTA
1897         /* Turn quotas off */
1898         for (i = 0; i < MAXQUOTAS; i++) {
1899                 if (sb_dqopt(sb)->files[i])
1900                         vfs_quota_off(sb, i, 0);
1901         }
1902 #endif
1903         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1904 }
1905 /*
1906  * Maximal extent format file size.
1907  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1908  * extent format containers, within a sector_t, and within i_blocks
1909  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1910  * so that won't be a limiting factor.
1911  *
1912  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1913  */
1914 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1915 {
1916         loff_t res;
1917         loff_t upper_limit = MAX_LFS_FILESIZE;
1918
1919         /* small i_blocks in vfs inode? */
1920         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1921                 /*
1922                  * CONFIG_LBD is not enabled implies the inode
1923                  * i_block represent total blocks in 512 bytes
1924                  * 32 == size of vfs inode i_blocks * 8
1925                  */
1926                 upper_limit = (1LL << 32) - 1;
1927
1928                 /* total blocks in file system block size */
1929                 upper_limit >>= (blkbits - 9);
1930                 upper_limit <<= blkbits;
1931         }
1932
1933         /* 32-bit extent-start container, ee_block */
1934         res = 1LL << 32;
1935         res <<= blkbits;
1936         res -= 1;
1937
1938         /* Sanity check against vm- & vfs- imposed limits */
1939         if (res > upper_limit)
1940                 res = upper_limit;
1941
1942         return res;
1943 }
1944
1945 /*
1946  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1947  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1948  * We need to be 1 filesystem block less than the 2^48 sector limit.
1949  */
1950 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1951 {
1952         loff_t res = EXT4_NDIR_BLOCKS;
1953         int meta_blocks;
1954         loff_t upper_limit;
1955         /* This is calculated to be the largest file size for a
1956          * dense, bitmapped file such that the total number of
1957          * sectors in the file, including data and all indirect blocks,
1958          * does not exceed 2^48 -1
1959          * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1960          * total number of  512 bytes blocks of the file
1961          */
1962
1963         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1964                 /*
1965                  * !has_huge_files or CONFIG_LBD is not enabled
1966                  * implies the inode i_block represent total blocks in
1967                  * 512 bytes 32 == size of vfs inode i_blocks * 8
1968                  */
1969                 upper_limit = (1LL << 32) - 1;
1970
1971                 /* total blocks in file system block size */
1972                 upper_limit >>= (bits - 9);
1973
1974         } else {
1975                 /*
1976                  * We use 48 bit ext4_inode i_blocks
1977                  * With EXT4_HUGE_FILE_FL set the i_blocks
1978                  * represent total number of blocks in
1979                  * file system block size
1980                  */
1981                 upper_limit = (1LL << 48) - 1;
1982
1983         }
1984
1985         /* indirect blocks */
1986         meta_blocks = 1;
1987         /* double indirect blocks */
1988         meta_blocks += 1 + (1LL << (bits-2));
1989         /* tripple indirect blocks */
1990         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1991
1992         upper_limit -= meta_blocks;
1993         upper_limit <<= bits;
1994
1995         res += 1LL << (bits-2);
1996         res += 1LL << (2*(bits-2));
1997         res += 1LL << (3*(bits-2));
1998         res <<= bits;
1999         if (res > upper_limit)
2000                 res = upper_limit;
2001
2002         if (res > MAX_LFS_FILESIZE)
2003                 res = MAX_LFS_FILESIZE;
2004
2005         return res;
2006 }
2007
2008 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2009                                 ext4_fsblk_t logical_sb_block, int nr)
2010 {
2011         struct ext4_sb_info *sbi = EXT4_SB(sb);
2012         ext4_group_t bg, first_meta_bg;
2013         int has_super = 0;
2014
2015         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2016
2017         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2018             nr < first_meta_bg)
2019                 return logical_sb_block + nr + 1;
2020         bg = sbi->s_desc_per_block * nr;
2021         if (ext4_bg_has_super(sb, bg))
2022                 has_super = 1;
2023         return (has_super + ext4_group_first_block_no(sb, bg));
2024 }
2025
2026 /**
2027  * ext4_get_stripe_size: Get the stripe size.
2028  * @sbi: In memory super block info
2029  *
2030  * If we have specified it via mount option, then
2031  * use the mount option value. If the value specified at mount time is
2032  * greater than the blocks per group use the super block value.
2033  * If the super block value is greater than blocks per group return 0.
2034  * Allocator needs it be less than blocks per group.
2035  *
2036  */
2037 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2038 {
2039         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2040         unsigned long stripe_width =
2041                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2042
2043         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2044                 return sbi->s_stripe;
2045
2046         if (stripe_width <= sbi->s_blocks_per_group)
2047                 return stripe_width;
2048
2049         if (stride <= sbi->s_blocks_per_group)
2050                 return stride;
2051
2052         return 0;
2053 }
2054
2055 /* sysfs supprt */
2056
2057 struct ext4_attr {
2058         struct attribute attr;
2059         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2060         ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2061                          const char *, size_t);
2062         int offset;
2063 };
2064
2065 static int parse_strtoul(const char *buf,
2066                 unsigned long max, unsigned long *value)
2067 {
2068         char *endp;
2069
2070         while (*buf && isspace(*buf))
2071                 buf++;
2072         *value = simple_strtoul(buf, &endp, 0);
2073         while (*endp && isspace(*endp))
2074                 endp++;
2075         if (*endp || *value > max)
2076                 return -EINVAL;
2077
2078         return 0;
2079 }
2080
2081 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2082                                               struct ext4_sb_info *sbi,
2083                                               char *buf)
2084 {
2085         return snprintf(buf, PAGE_SIZE, "%llu\n",
2086                         (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2087 }
2088
2089 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2090                                          struct ext4_sb_info *sbi, char *buf)
2091 {
2092         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2093
2094         return snprintf(buf, PAGE_SIZE, "%lu\n",
2095                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2096                          sbi->s_sectors_written_start) >> 1);
2097 }
2098
2099 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2100                                           struct ext4_sb_info *sbi, char *buf)
2101 {
2102         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2103
2104         return snprintf(buf, PAGE_SIZE, "%llu\n",
2105                         sbi->s_kbytes_written +
2106                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2107                           EXT4_SB(sb)->s_sectors_written_start) >> 1));
2108 }
2109
2110 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2111                                           struct ext4_sb_info *sbi,
2112                                           const char *buf, size_t count)
2113 {
2114         unsigned long t;
2115
2116         if (parse_strtoul(buf, 0x40000000, &t))
2117                 return -EINVAL;
2118
2119         if (!is_power_of_2(t))
2120                 return -EINVAL;
2121
2122         sbi->s_inode_readahead_blks = t;
2123         return count;
2124 }
2125
2126 static ssize_t sbi_ui_show(struct ext4_attr *a,
2127                                 struct ext4_sb_info *sbi, char *buf)
2128 {
2129         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2130
2131         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2132 }
2133
2134 static ssize_t sbi_ui_store(struct ext4_attr *a,
2135                             struct ext4_sb_info *sbi,
2136                             const char *buf, size_t count)
2137 {
2138         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2139         unsigned long t;
2140
2141         if (parse_strtoul(buf, 0xffffffff, &t))
2142                 return -EINVAL;
2143         *ui = t;
2144         return count;
2145 }
2146
2147 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2148 static struct ext4_attr ext4_attr_##_name = {                   \
2149         .attr = {.name = __stringify(_name), .mode = _mode },   \
2150         .show   = _show,                                        \
2151         .store  = _store,                                       \
2152         .offset = offsetof(struct ext4_sb_info, _elname),       \
2153 }
2154 #define EXT4_ATTR(name, mode, show, store) \
2155 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2156
2157 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2158 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2159 #define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2160         EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2161 #define ATTR_LIST(name) &ext4_attr_##name.attr
2162
2163 EXT4_RO_ATTR(delayed_allocation_blocks);
2164 EXT4_RO_ATTR(session_write_kbytes);
2165 EXT4_RO_ATTR(lifetime_write_kbytes);
2166 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2167                  inode_readahead_blks_store, s_inode_readahead_blks);
2168 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2169 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2170 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2171 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2172 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2173 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2174
2175 static struct attribute *ext4_attrs[] = {
2176         ATTR_LIST(delayed_allocation_blocks),
2177         ATTR_LIST(session_write_kbytes),
2178         ATTR_LIST(lifetime_write_kbytes),
2179         ATTR_LIST(inode_readahead_blks),
2180         ATTR_LIST(mb_stats),
2181         ATTR_LIST(mb_max_to_scan),
2182         ATTR_LIST(mb_min_to_scan),
2183         ATTR_LIST(mb_order2_req),
2184         ATTR_LIST(mb_stream_req),
2185         ATTR_LIST(mb_group_prealloc),
2186         NULL,
2187 };
2188
2189 static ssize_t ext4_attr_show(struct kobject *kobj,
2190                               struct attribute *attr, char *buf)
2191 {
2192         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2193                                                 s_kobj);
2194         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2195
2196         return a->show ? a->show(a, sbi, buf) : 0;
2197 }
2198
2199 static ssize_t ext4_attr_store(struct kobject *kobj,
2200                                struct attribute *attr,
2201                                const char *buf, size_t len)
2202 {
2203         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2204                                                 s_kobj);
2205         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2206
2207         return a->store ? a->store(a, sbi, buf, len) : 0;
2208 }
2209
2210 static void ext4_sb_release(struct kobject *kobj)
2211 {
2212         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2213                                                 s_kobj);
2214         complete(&sbi->s_kobj_unregister);
2215 }
2216
2217
2218 static struct sysfs_ops ext4_attr_ops = {
2219         .show   = ext4_attr_show,
2220         .store  = ext4_attr_store,
2221 };
2222
2223 static struct kobj_type ext4_ktype = {
2224         .default_attrs  = ext4_attrs,
2225         .sysfs_ops      = &ext4_attr_ops,
2226         .release        = ext4_sb_release,
2227 };
2228
2229 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2230                                 __releases(kernel_lock)
2231                                 __acquires(kernel_lock)
2232
2233 {
2234         struct buffer_head *bh;
2235         struct ext4_super_block *es = NULL;
2236         struct ext4_sb_info *sbi;
2237         ext4_fsblk_t block;
2238         ext4_fsblk_t sb_block = get_sb_block(&data);
2239         ext4_fsblk_t logical_sb_block;
2240         unsigned long offset = 0;
2241         unsigned long journal_devnum = 0;
2242         unsigned long def_mount_opts;
2243         struct inode *root;
2244         char *cp;
2245         const char *descr;
2246         int ret = -EINVAL;
2247         int blocksize;
2248         unsigned int db_count;
2249         unsigned int i;
2250         int needs_recovery, has_huge_files;
2251         int features;
2252         __u64 blocks_count;
2253         int err;
2254         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2255
2256         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2257         if (!sbi)
2258                 return -ENOMEM;
2259
2260         sbi->s_blockgroup_lock =
2261                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2262         if (!sbi->s_blockgroup_lock) {
2263                 kfree(sbi);
2264                 return -ENOMEM;
2265         }
2266         sb->s_fs_info = sbi;
2267         sbi->s_mount_opt = 0;
2268         sbi->s_resuid = EXT4_DEF_RESUID;
2269         sbi->s_resgid = EXT4_DEF_RESGID;
2270         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2271         sbi->s_sb_block = sb_block;
2272         sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2273                                                       sectors[1]);
2274
2275         unlock_kernel();
2276
2277         /* Cleanup superblock name */
2278         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2279                 *cp = '!';
2280
2281         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2282         if (!blocksize) {
2283                 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2284                 goto out_fail;
2285         }
2286
2287         /*
2288          * The ext4 superblock will not be buffer aligned for other than 1kB
2289          * block sizes.  We need to calculate the offset from buffer start.
2290          */
2291         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2292                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2293                 offset = do_div(logical_sb_block, blocksize);
2294         } else {
2295                 logical_sb_block = sb_block;
2296         }
2297
2298         if (!(bh = sb_bread(sb, logical_sb_block))) {
2299                 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2300                 goto out_fail;
2301         }
2302         /*
2303          * Note: s_es must be initialized as soon as possible because
2304          *       some ext4 macro-instructions depend on its value
2305          */
2306         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2307         sbi->s_es = es;
2308         sb->s_magic = le16_to_cpu(es->s_magic);
2309         if (sb->s_magic != EXT4_SUPER_MAGIC)
2310                 goto cantfind_ext4;
2311         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2312
2313         /* Set defaults before we parse the mount options */
2314         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2315         if (def_mount_opts & EXT4_DEFM_DEBUG)
2316                 set_opt(sbi->s_mount_opt, DEBUG);
2317         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2318                 set_opt(sbi->s_mount_opt, GRPID);
2319         if (def_mount_opts & EXT4_DEFM_UID16)
2320                 set_opt(sbi->s_mount_opt, NO_UID32);
2321 #ifdef CONFIG_EXT4_FS_XATTR
2322         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2323                 set_opt(sbi->s_mount_opt, XATTR_USER);
2324 #endif
2325 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2326         if (def_mount_opts & EXT4_DEFM_ACL)
2327                 set_opt(sbi->s_mount_opt, POSIX_ACL);
2328 #endif
2329         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2330                 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2331         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2332                 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2333         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2334                 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2335
2336         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2337                 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2338         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2339                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2340         else
2341                 set_opt(sbi->s_mount_opt, ERRORS_RO);
2342
2343         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2344         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2345         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2346         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2347         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2348
2349         set_opt(sbi->s_mount_opt, BARRIER);
2350
2351         /*
2352          * enable delayed allocation by default
2353          * Use -o nodelalloc to turn it off
2354          */
2355         set_opt(sbi->s_mount_opt, DELALLOC);
2356
2357
2358         if (!parse_options((char *) data, sb, &journal_devnum,
2359                            &journal_ioprio, NULL, 0))
2360                 goto failed_mount;
2361
2362         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2363                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2364
2365         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2366             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2367              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2368              EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2369                 printk(KERN_WARNING
2370                        "EXT4-fs warning: feature flags set on rev 0 fs, "
2371                        "running e2fsck is recommended\n");
2372
2373         /*
2374          * Check feature flags regardless of the revision level, since we
2375          * previously didn't change the revision level when setting the flags,
2376          * so there is a chance incompat flags are set on a rev 0 filesystem.
2377          */
2378         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2379         if (features) {
2380                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2381                        "unsupported optional features (%x).\n", sb->s_id,
2382                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2383                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2384                 goto failed_mount;
2385         }
2386         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2387         if (!(sb->s_flags & MS_RDONLY) && features) {
2388                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2389                        "unsupported optional features (%x).\n", sb->s_id,
2390                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2391                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
2392                 goto failed_mount;
2393         }
2394         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2395                                     EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2396         if (has_huge_files) {
2397                 /*
2398                  * Large file size enabled file system can only be
2399                  * mount if kernel is build with CONFIG_LBD
2400                  */
2401                 if (sizeof(root->i_blocks) < sizeof(u64) &&
2402                                 !(sb->s_flags & MS_RDONLY)) {
2403                         printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2404                                         "files cannot be mounted read-write "
2405                                         "without CONFIG_LBD.\n", sb->s_id);
2406                         goto failed_mount;
2407                 }
2408         }
2409         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2410
2411         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2412             blocksize > EXT4_MAX_BLOCK_SIZE) {
2413                 printk(KERN_ERR
2414                        "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2415                        blocksize, sb->s_id);
2416                 goto failed_mount;
2417         }
2418
2419         if (sb->s_blocksize != blocksize) {
2420
2421                 /* Validate the filesystem blocksize */
2422                 if (!sb_set_blocksize(sb, blocksize)) {
2423                         printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2424                                         blocksize);
2425                         goto failed_mount;
2426                 }
2427
2428                 brelse(bh);
2429                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2430                 offset = do_div(logical_sb_block, blocksize);
2431                 bh = sb_bread(sb, logical_sb_block);
2432                 if (!bh) {
2433                         printk(KERN_ERR
2434                                "EXT4-fs: Can't read superblock on 2nd try.\n");
2435                         goto failed_mount;
2436                 }
2437                 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2438                 sbi->s_es = es;
2439                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2440                         printk(KERN_ERR
2441                                "EXT4-fs: Magic mismatch, very weird !\n");
2442                         goto failed_mount;
2443                 }
2444         }
2445
2446         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2447                                                       has_huge_files);
2448         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2449
2450         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2451                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2452                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2453         } else {
2454                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2455                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2456                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2457                     (!is_power_of_2(sbi->s_inode_size)) ||
2458                     (sbi->s_inode_size > blocksize)) {
2459                         printk(KERN_ERR
2460                                "EXT4-fs: unsupported inode size: %d\n",
2461                                sbi->s_inode_size);
2462                         goto failed_mount;
2463                 }
2464                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2465                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2466         }
2467         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2468         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2469                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2470                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2471                     !is_power_of_2(sbi->s_desc_size)) {
2472                         printk(KERN_ERR
2473                                "EXT4-fs: unsupported descriptor size %lu\n",
2474                                sbi->s_desc_size);
2475                         goto failed_mount;
2476                 }
2477         } else
2478                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2479         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2480         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2481         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2482                 goto cantfind_ext4;
2483         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2484         if (sbi->s_inodes_per_block == 0)
2485                 goto cantfind_ext4;
2486         sbi->s_itb_per_group = sbi->s_inodes_per_group /
2487                                         sbi->s_inodes_per_block;
2488         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2489         sbi->s_sbh = bh;
2490         sbi->s_mount_state = le16_to_cpu(es->s_state);
2491         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2492         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2493         for (i = 0; i < 4; i++)
2494                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2495         sbi->s_def_hash_version = es->s_def_hash_version;
2496         i = le32_to_cpu(es->s_flags);
2497         if (i & EXT2_FLAGS_UNSIGNED_HASH)
2498                 sbi->s_hash_unsigned = 3;
2499         else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2500 #ifdef __CHAR_UNSIGNED__
2501                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2502                 sbi->s_hash_unsigned = 3;
2503 #else
2504                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2505 #endif
2506                 sb->s_dirt = 1;
2507         }
2508
2509         if (sbi->s_blocks_per_group > blocksize * 8) {
2510                 printk(KERN_ERR
2511                        "EXT4-fs: #blocks per group too big: %lu\n",
2512                        sbi->s_blocks_per_group);
2513                 goto failed_mount;
2514         }
2515         if (sbi->s_inodes_per_group > blocksize * 8) {
2516                 printk(KERN_ERR
2517                        "EXT4-fs: #inodes per group too big: %lu\n",
2518                        sbi->s_inodes_per_group);
2519                 goto failed_mount;
2520         }
2521
2522         if (ext4_blocks_count(es) >
2523                     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2524                 printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2525                         " too large to mount safely\n", sb->s_id);
2526                 if (sizeof(sector_t) < 8)
2527                         printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2528                                         "enabled\n");
2529                 goto failed_mount;
2530         }
2531
2532         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2533                 goto cantfind_ext4;
2534
2535         /* check blocks count against device size */
2536         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2537         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2538                 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2539                        "exceeds size of device (%llu blocks)\n",
2540                        ext4_blocks_count(es), blocks_count);
2541                 goto failed_mount;
2542         }
2543
2544         /*
2545          * It makes no sense for the first data block to be beyond the end
2546          * of the filesystem.
2547          */
2548         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2549                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2550                        "block %u is beyond end of filesystem (%llu)\n",
2551                        le32_to_cpu(es->s_first_data_block),
2552                        ext4_blocks_count(es));
2553                 goto failed_mount;
2554         }
2555         blocks_count = (ext4_blocks_count(es) -
2556                         le32_to_cpu(es->s_first_data_block) +
2557                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
2558         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2559         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2560                 printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2561                        "(block count %llu, first data block %u, "
2562                        "blocks per group %lu)\n", sbi->s_groups_count,
2563                        ext4_blocks_count(es),
2564                        le32_to_cpu(es->s_first_data_block),
2565                        EXT4_BLOCKS_PER_GROUP(sb));
2566                 goto failed_mount;
2567         }
2568         sbi->s_groups_count = blocks_count;
2569         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2570                    EXT4_DESC_PER_BLOCK(sb);
2571         sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2572                                     GFP_KERNEL);
2573         if (sbi->s_group_desc == NULL) {
2574                 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2575                 goto failed_mount;
2576         }
2577
2578 #ifdef CONFIG_PROC_FS
2579         if (ext4_proc_root)
2580                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2581 #endif
2582
2583         bgl_lock_init(sbi->s_blockgroup_lock);
2584
2585         for (i = 0; i < db_count; i++) {
2586                 block = descriptor_loc(sb, logical_sb_block, i);
2587                 sbi->s_group_desc[i] = sb_bread(sb, block);
2588                 if (!sbi->s_group_desc[i]) {
2589                         printk(KERN_ERR "EXT4-fs: "
2590                                "can't read group descriptor %d\n", i);
2591                         db_count = i;
2592                         goto failed_mount2;
2593                 }
2594         }
2595         if (!ext4_check_descriptors(sb)) {
2596                 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2597                 goto failed_mount2;
2598         }
2599         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2600                 if (!ext4_fill_flex_info(sb)) {
2601                         printk(KERN_ERR
2602                                "EXT4-fs: unable to initialize "
2603                                "flex_bg meta info!\n");
2604                         goto failed_mount2;
2605                 }
2606
2607         sbi->s_gdb_count = db_count;
2608         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2609         spin_lock_init(&sbi->s_next_gen_lock);
2610
2611         err = percpu_counter_init(&sbi->s_freeblocks_counter,
2612                         ext4_count_free_blocks(sb));
2613         if (!err) {
2614                 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2615                                 ext4_count_free_inodes(sb));
2616         }
2617         if (!err) {
2618                 err = percpu_counter_init(&sbi->s_dirs_counter,
2619                                 ext4_count_dirs(sb));
2620         }
2621         if (!err) {
2622                 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2623         }
2624         if (err) {
2625                 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2626                 goto failed_mount3;
2627         }
2628
2629         sbi->s_stripe = ext4_get_stripe_size(sbi);
2630
2631         /*
2632          * set up enough so that it can read an inode
2633          */
2634         if (!test_opt(sb, NOLOAD) &&
2635             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2636                 sb->s_op = &ext4_sops;
2637         else
2638                 sb->s_op = &ext4_nojournal_sops;
2639         sb->s_export_op = &ext4_export_ops;
2640         sb->s_xattr = ext4_xattr_handlers;
2641 #ifdef CONFIG_QUOTA
2642         sb->s_qcop = &ext4_qctl_operations;
2643         sb->dq_op = &ext4_quota_operations;
2644 #endif
2645         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2646         mutex_init(&sbi->s_orphan_lock);
2647         mutex_init(&sbi->s_resize_lock);
2648
2649         sb->s_root = NULL;
2650
2651         needs_recovery = (es->s_last_orphan != 0 ||
2652                           EXT4_HAS_INCOMPAT_FEATURE(sb,
2653                                     EXT4_FEATURE_INCOMPAT_RECOVER));
2654
2655         /*
2656          * The first inode we look at is the journal inode.  Don't try
2657          * root first: it may be modified in the journal!
2658          */
2659         if (!test_opt(sb, NOLOAD) &&
2660             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2661                 if (ext4_load_journal(sb, es, journal_devnum))
2662                         goto failed_mount3;
2663                 if (!(sb->s_flags & MS_RDONLY) &&
2664                     EXT4_SB(sb)->s_journal->j_failed_commit) {
2665                         printk(KERN_CRIT "EXT4-fs error (device %s): "
2666                                "ext4_fill_super: Journal transaction "
2667                                "%u is corrupt\n", sb->s_id,
2668                                EXT4_SB(sb)->s_journal->j_failed_commit);
2669                         if (test_opt(sb, ERRORS_RO)) {
2670                                 printk(KERN_CRIT
2671                                        "Mounting filesystem read-only\n");
2672                                 sb->s_flags |= MS_RDONLY;
2673                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2674                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2675                         }
2676                         if (test_opt(sb, ERRORS_PANIC)) {
2677                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2678                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2679                                 ext4_commit_super(sb, 1);
2680                                 goto failed_mount4;
2681                         }
2682                 }
2683         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2684               EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2685                 printk(KERN_ERR "EXT4-fs: required journal recovery "
2686                        "suppressed and not mounted read-only\n");
2687                 goto failed_mount4;
2688         } else {
2689                 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2690                 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2691                 sbi->s_journal = NULL;
2692                 needs_recovery = 0;
2693                 goto no_journal;
2694         }
2695
2696         if (ext4_blocks_count(es) > 0xffffffffULL &&
2697             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2698                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
2699                 printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2700                 goto failed_mount4;
2701         }
2702
2703         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2704                 jbd2_journal_set_features(sbi->s_journal,
2705                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2706                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2707         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2708                 jbd2_journal_set_features(sbi->s_journal,
2709                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2710                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2711                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2712         } else {
2713                 jbd2_journal_clear_features(sbi->s_journal,
2714                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2715                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2716         }
2717
2718         /* We have now updated the journal if required, so we can
2719          * validate the data journaling mode. */
2720         switch (test_opt(sb, DATA_FLAGS)) {
2721         case 0:
2722                 /* No mode set, assume a default based on the journal
2723                  * capabilities: ORDERED_DATA if the journal can
2724                  * cope, else JOURNAL_DATA
2725                  */
2726                 if (jbd2_journal_check_available_features
2727                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2728                         set_opt(sbi->s_mount_opt, ORDERED_DATA);
2729                 else
2730                         set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2731                 break;
2732
2733         case EXT4_MOUNT_ORDERED_DATA:
2734         case EXT4_MOUNT_WRITEBACK_DATA:
2735                 if (!jbd2_journal_check_available_features
2736                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2737                         printk(KERN_ERR "EXT4-fs: Journal does not support "
2738                                "requested data journaling mode\n");
2739                         goto failed_mount4;
2740                 }
2741         default:
2742                 break;
2743         }
2744         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2745
2746 no_journal:
2747
2748         if (test_opt(sb, NOBH)) {
2749                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2750                         printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2751                                 "its supported only with writeback mode\n");
2752                         clear_opt(sbi->s_mount_opt, NOBH);
2753                 }
2754         }
2755         /*
2756          * The jbd2_journal_load will have done any necessary log recovery,
2757          * so we can safely mount the rest of the filesystem now.
2758          */
2759
2760         root = ext4_iget(sb, EXT4_ROOT_INO);
2761         if (IS_ERR(root)) {
2762                 printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2763                 ret = PTR_ERR(root);
2764                 goto failed_mount4;
2765         }
2766         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2767                 iput(root);
2768                 printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2769                 goto failed_mount4;
2770         }
2771         sb->s_root = d_alloc_root(root);
2772         if (!sb->s_root) {
2773                 printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2774                 iput(root);
2775                 ret = -ENOMEM;
2776                 goto failed_mount4;
2777         }
2778
2779         ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2780
2781         /* determine the minimum size of new large inodes, if present */
2782         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2783                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2784                                                      EXT4_GOOD_OLD_INODE_SIZE;
2785                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2786                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2787                         if (sbi->s_want_extra_isize <
2788                             le16_to_cpu(es->s_want_extra_isize))
2789                                 sbi->s_want_extra_isize =
2790                                         le16_to_cpu(es->s_want_extra_isize);
2791                         if (sbi->s_want_extra_isize <
2792                             le16_to_cpu(es->s_min_extra_isize))
2793                                 sbi->s_want_extra_isize =
2794                                         le16_to_cpu(es->s_min_extra_isize);
2795                 }
2796         }
2797         /* Check if enough inode space is available */
2798         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2799                                                         sbi->s_inode_size) {
2800                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2801                                                        EXT4_GOOD_OLD_INODE_SIZE;
2802                 printk(KERN_INFO "EXT4-fs: required extra inode space not"
2803                         "available.\n");
2804         }
2805
2806         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2807                 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2808                                 "requested data journaling mode\n");
2809                 clear_opt(sbi->s_mount_opt, DELALLOC);
2810         } else if (test_opt(sb, DELALLOC))
2811                 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2812
2813         ext4_ext_init(sb);
2814         err = ext4_mb_init(sb, needs_recovery);
2815         if (err) {
2816                 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2817                        err);
2818                 goto failed_mount4;
2819         }
2820
2821         sbi->s_kobj.kset = ext4_kset;
2822         init_completion(&sbi->s_kobj_unregister);
2823         err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2824                                    "%s", sb->s_id);
2825         if (err) {
2826                 ext4_mb_release(sb);
2827                 ext4_ext_release(sb);
2828                 goto failed_mount4;
2829         };
2830
2831         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2832         ext4_orphan_cleanup(sb, es);
2833         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2834         if (needs_recovery) {
2835                 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2836                 ext4_mark_recovery_complete(sb, es);
2837         }
2838         if (EXT4_SB(sb)->s_journal) {
2839                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2840                         descr = " journalled data mode";
2841                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2842                         descr = " ordered data mode";
2843                 else
2844                         descr = " writeback data mode";
2845         } else
2846                 descr = "out journal";
2847
2848         printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2849                sb->s_id, descr);
2850
2851         lock_kernel();
2852         return 0;
2853
2854 cantfind_ext4:
2855         if (!silent)
2856                 printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2857                        sb->s_id);
2858         goto failed_mount;
2859
2860 failed_mount4:
2861         printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2862         if (sbi->s_journal) {
2863                 jbd2_journal_destroy(sbi->s_journal);
2864                 sbi->s_journal = NULL;
2865         }
2866 failed_mount3:
2867         if (sbi->s_flex_groups) {
2868                 if (is_vmalloc_addr(sbi->s_flex_groups))
2869                         vfree(sbi->s_flex_groups);
2870                 else
2871                         kfree(sbi->s_flex_groups);
2872         }
2873         percpu_counter_destroy(&sbi->s_freeblocks_counter);
2874         percpu_counter_destroy(&sbi->s_freeinodes_counter);
2875         percpu_counter_destroy(&sbi->s_dirs_counter);
2876         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2877 failed_mount2:
2878         for (i = 0; i < db_count; i++)
2879                 brelse(sbi->s_group_desc[i]);
2880         kfree(sbi->s_group_desc);
2881 failed_mount:
2882         if (sbi->s_proc) {
2883                 remove_proc_entry(sb->s_id, ext4_proc_root);
2884         }
2885 #ifdef CONFIG_QUOTA
2886         for (i = 0; i < MAXQUOTAS; i++)
2887                 kfree(sbi->s_qf_names[i]);
2888 #endif
2889         ext4_blkdev_remove(sbi);
2890         brelse(bh);
2891 out_fail:
2892         sb->s_fs_info = NULL;
2893         kfree(sbi);
2894         lock_kernel();
2895         return ret;
2896 }
2897
2898 /*
2899  * Setup any per-fs journal parameters now.  We'll do this both on
2900  * initial mount, once the journal has been initialised but before we've
2901  * done any recovery; and again on any subsequent remount.
2902  */
2903 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2904 {
2905         struct ext4_sb_info *sbi = EXT4_SB(sb);
2906
2907         journal->j_commit_interval = sbi->s_commit_interval;
2908         journal->j_min_batch_time = sbi->s_min_batch_time;
2909         journal->j_max_batch_time = sbi->s_max_batch_time;
2910
2911         spin_lock(&journal->j_state_lock);
2912         if (test_opt(sb, BARRIER))
2913                 journal->j_flags |= JBD2_BARRIER;
2914         else
2915                 journal->j_flags &= ~JBD2_BARRIER;
2916         if (test_opt(sb, DATA_ERR_ABORT))
2917                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2918         else
2919                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2920         spin_unlock(&journal->j_state_lock);
2921 }
2922
2923 static journal_t *ext4_get_journal(struct super_block *sb,
2924                                    unsigned int journal_inum)
2925 {
2926         struct inode *journal_inode;
2927         journal_t *journal;
2928
2929         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2930
2931         /* First, test for the existence of a valid inode on disk.  Bad
2932          * things happen if we iget() an unused inode, as the subsequent
2933          * iput() will try to delete it. */
2934
2935         journal_inode = ext4_iget(sb, journal_inum);
2936         if (IS_ERR(journal_inode)) {
2937                 printk(KERN_ERR "EXT4-fs: no journal found.\n");
2938                 return NULL;
2939         }
2940         if (!journal_inode->i_nlink) {
2941                 make_bad_inode(journal_inode);
2942                 iput(journal_inode);
2943                 printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2944                 return NULL;
2945         }
2946
2947         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2948                   journal_inode, journal_inode->i_size);
2949         if (!S_ISREG(journal_inode->i_mode)) {
2950                 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2951                 iput(journal_inode);
2952                 return NULL;
2953         }
2954
2955         journal = jbd2_journal_init_inode(journal_inode);
2956         if (!journal) {
2957                 printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2958                 iput(journal_inode);
2959                 return NULL;
2960         }
2961         journal->j_private = sb;
2962         ext4_init_journal_params(sb, journal);
2963         return journal;
2964 }
2965
2966 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2967                                        dev_t j_dev)
2968 {
2969         struct buffer_head *bh;
2970         journal_t *journal;
2971         ext4_fsblk_t start;
2972         ext4_fsblk_t len;
2973         int hblock, blocksize;
2974         ext4_fsblk_t sb_block;
2975         unsigned long offset;
2976         struct ext4_super_block *es;
2977         struct block_device *bdev;
2978
2979         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2980
2981         bdev = ext4_blkdev_get(j_dev);
2982         if (bdev == NULL)
2983                 return NULL;
2984
2985         if (bd_claim(bdev, sb)) {
2986                 printk(KERN_ERR
2987                         "EXT4-fs: failed to claim external journal device.\n");
2988                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2989                 return NULL;
2990         }
2991
2992         blocksize = sb->s_blocksize;
2993         hblock = bdev_hardsect_size(bdev);
2994         if (blocksize < hblock) {
2995                 printk(KERN_ERR
2996                         "EXT4-fs: blocksize too small for journal device.\n");
2997                 goto out_bdev;
2998         }
2999
3000         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3001         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3002         set_blocksize(bdev, blocksize);
3003         if (!(bh = __bread(bdev, sb_block, blocksize))) {
3004                 printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
3005                        "external journal\n");
3006                 goto out_bdev;
3007         }
3008
3009         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3010         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3011             !(le32_to_cpu(es->s_feature_incompat) &
3012               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3013                 printk(KERN_ERR "EXT4-fs: external journal has "
3014                                         "bad superblock\n");
3015                 brelse(bh);
3016                 goto out_bdev;
3017         }
3018
3019         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3020                 printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
3021                 brelse(bh);
3022                 goto out_bdev;
3023         }
3024
3025         len = ext4_blocks_count(es);
3026         start = sb_block + 1;
3027         brelse(bh);     /* we're done with the superblock */
3028
3029         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3030                                         start, len, blocksize);
3031         if (!journal) {
3032                 printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
3033                 goto out_bdev;
3034         }
3035         journal->j_private = sb;
3036         ll_rw_block(READ, 1, &journal->j_sb_buffer);
3037         wait_on_buffer(journal->j_sb_buffer);
3038         if (!buffer_uptodate(journal->j_sb_buffer)) {
3039                 printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
3040                 goto out_journal;
3041         }
3042         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3043                 printk(KERN_ERR "EXT4-fs: External journal has more than one "
3044                                         "user (unsupported) - %d\n",
3045                         be32_to_cpu(journal->j_superblock->s_nr_users));
3046                 goto out_journal;
3047         }
3048         EXT4_SB(sb)->journal_bdev = bdev;
3049         ext4_init_journal_params(sb, journal);
3050         return journal;
3051 out_journal:
3052         jbd2_journal_destroy(journal);
3053 out_bdev:
3054         ext4_blkdev_put(bdev);
3055         return NULL;
3056 }
3057
3058 static int ext4_load_journal(struct super_block *sb,
3059                              struct ext4_super_block *es,
3060                              unsigned long journal_devnum)
3061 {
3062         journal_t *journal;
3063         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3064         dev_t journal_dev;
3065         int err = 0;
3066         int really_read_only;
3067
3068         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3069
3070         if (journal_devnum &&
3071             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3072                 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
3073                         "numbers have changed\n");
3074                 journal_dev = new_decode_dev(journal_devnum);
3075         } else
3076                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3077
3078         really_read_only = bdev_read_only(sb->s_bdev);
3079
3080         /*
3081          * Are we loading a blank journal or performing recovery after a
3082          * crash?  For recovery, we need to check in advance whether we
3083          * can get read-write access to the device.
3084          */
3085
3086         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3087                 if (sb->s_flags & MS_RDONLY) {
3088                         printk(KERN_INFO "EXT4-fs: INFO: recovery "
3089                                         "required on readonly filesystem.\n");
3090                         if (really_read_only) {
3091                                 printk(KERN_ERR "EXT4-fs: write access "
3092                                         "unavailable, cannot proceed.\n");
3093                                 return -EROFS;
3094                         }
3095                         printk(KERN_INFO "EXT4-fs: write access will "
3096                                "be enabled during recovery.\n");
3097                 }
3098         }
3099
3100         if (journal_inum && journal_dev) {
3101                 printk(KERN_ERR "EXT4-fs: filesystem has both journal "
3102                        "and inode journals!\n");
3103                 return -EINVAL;
3104         }
3105
3106         if (journal_inum) {
3107                 if (!(journal = ext4_get_journal(sb, journal_inum)))
3108                         return -EINVAL;
3109         } else {
3110                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3111                         return -EINVAL;
3112         }
3113
3114         if (journal->j_flags & JBD2_BARRIER)
3115                 printk(KERN_INFO "EXT4-fs: barriers enabled\n");
3116         else
3117                 printk(KERN_INFO "EXT4-fs: barriers disabled\n");
3118
3119         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3120                 err = jbd2_journal_update_format(journal);
3121                 if (err)  {
3122                         printk(KERN_ERR "EXT4-fs: error updating journal.\n");
3123                         jbd2_journal_destroy(journal);
3124                         return err;
3125                 }
3126         }
3127
3128         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3129                 err = jbd2_journal_wipe(journal, !really_read_only);
3130         if (!err)
3131                 err = jbd2_journal_load(journal);
3132
3133         if (err) {
3134                 printk(KERN_ERR "EXT4-fs: error loading journal.\n");
3135                 jbd2_journal_destroy(journal);
3136                 return err;
3137         }
3138
3139         EXT4_SB(sb)->s_journal = journal;
3140         ext4_clear_journal_err(sb, es);
3141
3142         if (journal_devnum &&
3143             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3144                 es->s_journal_dev = cpu_to_le32(journal_devnum);
3145
3146                 /* Make sure we flush the recovery flag to disk. */
3147                 ext4_commit_super(sb, 1);
3148         }
3149
3150         return 0;
3151 }
3152
3153 static int ext4_commit_super(struct super_block *sb, int sync)
3154 {
3155         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3156         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3157         int error = 0;
3158
3159         if (!sbh)
3160                 return error;
3161         if (buffer_write_io_error(sbh)) {
3162                 /*
3163                  * Oh, dear.  A previous attempt to write the
3164                  * superblock failed.  This could happen because the
3165                  * USB device was yanked out.  Or it could happen to
3166                  * be a transient write error and maybe the block will
3167                  * be remapped.  Nothing we can do but to retry the
3168                  * write and hope for the best.
3169                  */
3170                 printk(KERN_ERR "EXT4-fs: previous I/O error to "
3171                        "superblock detected for %s.\n", sb->s_id);
3172                 clear_buffer_write_io_error(sbh);
3173                 set_buffer_uptodate(sbh);
3174         }
3175         es->s_wtime = cpu_to_le32(get_seconds());
3176         es->s_kbytes_written =
3177                 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
3178                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3179                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
3180         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3181                                         &EXT4_SB(sb)->s_freeblocks_counter));
3182         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3183                                         &EXT4_SB(sb)->s_freeinodes_counter));
3184         sb->s_dirt = 0;
3185         BUFFER_TRACE(sbh, "marking dirty");
3186         mark_buffer_dirty(sbh);
3187         if (sync) {
3188                 error = sync_dirty_buffer(sbh);
3189                 if (error)
3190                         return error;
3191
3192                 error = buffer_write_io_error(sbh);
3193                 if (error) {
3194                         printk(KERN_ERR "EXT4-fs: I/O error while writing "
3195                                "superblock for %s.\n", sb->s_id);
3196                         clear_buffer_write_io_error(sbh);
3197                         set_buffer_uptodate(sbh);
3198                 }
3199         }
3200         return error;
3201 }
3202
3203
3204 /*
3205  * Have we just finished recovery?  If so, and if we are mounting (or
3206  * remounting) the filesystem readonly, then we will end up with a
3207  * consistent fs on disk.  Record that fact.
3208  */
3209 static void ext4_mark_recovery_complete(struct super_block *sb,
3210                                         struct ext4_super_block *es)
3211 {
3212         journal_t *journal = EXT4_SB(sb)->s_journal;
3213
3214         if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3215                 BUG_ON(journal != NULL);
3216                 return;
3217         }
3218         jbd2_journal_lock_updates(journal);
3219         if (jbd2_journal_flush(journal) < 0)
3220                 goto out;
3221
3222         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3223             sb->s_flags & MS_RDONLY) {
3224                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3225                 ext4_commit_super(sb, 1);
3226         }
3227
3228 out:
3229         jbd2_journal_unlock_updates(journal);
3230 }
3231
3232 /*
3233  * If we are mounting (or read-write remounting) a filesystem whose journal
3234  * has recorded an error from a previous lifetime, move that error to the
3235  * main filesystem now.
3236  */
3237 static void ext4_clear_journal_err(struct super_block *sb,
3238                                    struct ext4_super_block *es)
3239 {
3240         journal_t *journal;
3241         int j_errno;
3242         const char *errstr;
3243
3244         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3245
3246         journal = EXT4_SB(sb)->s_journal;
3247
3248         /*
3249          * Now check for any error status which may have been recorded in the
3250          * journal by a prior ext4_error() or ext4_abort()
3251          */
3252
3253         j_errno = jbd2_journal_errno(journal);
3254         if (j_errno) {
3255                 char nbuf[16];
3256
3257                 errstr = ext4_decode_error(sb, j_errno, nbuf);
3258                 ext4_warning(sb, __func__, "Filesystem error recorded "
3259                              "from previous mount: %s", errstr);
3260                 ext4_warning(sb, __func__, "Marking fs in need of "
3261                              "filesystem check.");
3262
3263                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3264                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3265                 ext4_commit_super(sb, 1);
3266
3267                 jbd2_journal_clear_err(journal);
3268         }
3269 }
3270
3271 /*
3272  * Force the running and committing transactions to commit,
3273  * and wait on the commit.
3274  */
3275 int ext4_force_commit(struct super_block *sb)
3276 {
3277         journal_t *journal;
3278         int ret = 0;
3279
3280         if (sb->s_flags & MS_RDONLY)
3281                 return 0;
3282
3283         journal = EXT4_SB(sb)->s_journal;
3284         if (journal)
3285                 ret = ext4_journal_force_commit(journal);
3286
3287         return ret;
3288 }
3289
3290 static void ext4_write_super(struct super_block *sb)
3291 {
3292         ext4_commit_super(sb, 1);
3293 }
3294
3295 static int ext4_sync_fs(struct super_block *sb, int wait)
3296 {
3297         int ret = 0;
3298         tid_t target;
3299
3300         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3301         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3302                 if (wait)
3303                         jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3304         }
3305         return ret;
3306 }
3307
3308 /*
3309  * LVM calls this function before a (read-only) snapshot is created.  This
3310  * gives us a chance to flush the journal completely and mark the fs clean.
3311  */
3312 static int ext4_freeze(struct super_block *sb)
3313 {
3314         int error = 0;
3315         journal_t *journal;
3316
3317         if (sb->s_flags & MS_RDONLY)
3318                 return 0;
3319
3320         journal = EXT4_SB(sb)->s_journal;
3321
3322         /* Now we set up the journal barrier. */
3323         jbd2_journal_lock_updates(journal);
3324
3325         /*
3326          * Don't clear the needs_recovery flag if we failed to flush
3327          * the journal.
3328          */
3329         error = jbd2_journal_flush(journal);
3330         if (error < 0) {
3331         out:
3332                 jbd2_journal_unlock_updates(journal);
3333                 return error;
3334         }
3335
3336         /* Journal blocked and flushed, clear needs_recovery flag. */
3337         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3338         error = ext4_commit_super(sb, 1);
3339         if (error)
3340                 goto out;
3341         return 0;
3342 }
3343
3344 /*
3345  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3346  * flag here, even though the filesystem is not technically dirty yet.
3347  */
3348 static int ext4_unfreeze(struct super_block *sb)
3349 {
3350         if (sb->s_flags & MS_RDONLY)
3351                 return 0;
3352
3353         lock_super(sb);
3354         /* Reset the needs_recovery flag before the fs is unlocked. */
3355         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3356         ext4_commit_super(sb, 1);
3357         unlock_super(sb);
3358         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3359         return 0;
3360 }
3361
3362 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3363 {
3364         struct ext4_super_block *es;
3365         struct ext4_sb_info *sbi = EXT4_SB(sb);
3366         ext4_fsblk_t n_blocks_count = 0;
3367         unsigned long old_sb_flags;
3368         struct ext4_mount_options old_opts;
3369         ext4_group_t g;
3370         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3371         int err;
3372 #ifdef CONFIG_QUOTA
3373         int i;
3374 #endif
3375
3376         /* Store the original options */
3377         old_sb_flags = sb->s_flags;
3378         old_opts.s_mount_opt = sbi->s_mount_opt;
3379         old_opts.s_resuid = sbi->s_resuid;
3380         old_opts.s_resgid = sbi->s_resgid;
3381         old_opts.s_commit_interval = sbi->s_commit_interval;
3382         old_opts.s_min_batch_time = sbi->s_min_batch_time;
3383         old_opts.s_max_batch_time = sbi->s_max_batch_time;
3384 #ifdef CONFIG_QUOTA
3385         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3386         for (i = 0; i < MAXQUOTAS; i++)
3387                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3388 #endif
3389         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3390                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3391
3392         /*
3393          * Allow the "check" option to be passed as a remount option.
3394          */
3395         if (!parse_options(data, sb, NULL, &journal_ioprio,
3396                            &n_blocks_count, 1)) {
3397                 err = -EINVAL;
3398                 goto restore_opts;
3399         }
3400
3401         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3402                 ext4_abort(sb, __func__, "Abort forced by user");
3403
3404         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3405                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3406
3407         es = sbi->s_es;
3408
3409         if (sbi->s_journal) {
3410                 ext4_init_journal_params(sb, sbi->s_journal);
3411                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3412         }
3413
3414         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3415                 n_blocks_count > ext4_blocks_count(es)) {
3416                 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3417                         err = -EROFS;
3418                         goto restore_opts;
3419                 }
3420
3421                 if (*flags & MS_RDONLY) {
3422                         /*
3423                          * First of all, the unconditional stuff we have to do
3424                          * to disable replay of the journal when we next remount
3425                          */
3426                         sb->s_flags |= MS_RDONLY;
3427
3428                         /*
3429                          * OK, test if we are remounting a valid rw partition
3430                          * readonly, and if so set the rdonly flag and then
3431                          * mark the partition as valid again.
3432                          */
3433                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3434                             (sbi->s_mount_state & EXT4_VALID_FS))
3435                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
3436
3437                         if (sbi->s_journal)
3438                                 ext4_mark_recovery_complete(sb, es);
3439                 } else {
3440                         int ret;
3441                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3442                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3443                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3444                                        "remount RDWR because of unsupported "
3445                                        "optional features (%x).\n", sb->s_id,
3446                                 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3447                                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
3448                                 err = -EROFS;
3449                                 goto restore_opts;
3450                         }
3451
3452                         /*
3453                          * Make sure the group descriptor checksums
3454                          * are sane.  If they aren't, refuse to
3455                          * remount r/w.
3456                          */
3457                         for (g = 0; g < sbi->s_groups_count; g++) {
3458                                 struct ext4_group_desc *gdp =
3459                                         ext4_get_group_desc(sb, g, NULL);
3460
3461                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3462                                         printk(KERN_ERR
3463                "EXT4-fs: ext4_remount: "
3464                 "Checksum for group %u failed (%u!=%u)\n",
3465                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3466                                                le16_to_cpu(gdp->bg_checksum));
3467                                         err = -EINVAL;
3468                                         goto restore_opts;
3469                                 }
3470                         }
3471
3472                         /*
3473                          * If we have an unprocessed orphan list hanging
3474                          * around from a previously readonly bdev mount,
3475                          * require a full umount/remount for now.
3476                          */
3477                         if (es->s_last_orphan) {
3478                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3479                                        "remount RDWR because of unprocessed "
3480                                        "orphan inode list.  Please "
3481                                        "umount/remount instead.\n",
3482                                        sb->s_id);
3483                                 err = -EINVAL;
3484                                 goto restore_opts;
3485                         }
3486
3487                         /*
3488                          * Mounting a RDONLY partition read-write, so reread
3489                          * and store the current valid flag.  (It may have
3490                          * been changed by e2fsck since we originally mounted
3491                          * the partition.)
3492                          */
3493                         if (sbi->s_journal)
3494                                 ext4_clear_journal_err(sb, es);
3495                         sbi->s_mount_state = le16_to_cpu(es->s_state);
3496                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3497                                 goto restore_opts;
3498                         if (!ext4_setup_super(sb, es, 0))
3499                                 sb->s_flags &= ~MS_RDONLY;
3500                 }
3501         }
3502         if (sbi->s_journal == NULL)
3503                 ext4_commit_super(sb, 1);
3504
3505 #ifdef CONFIG_QUOTA
3506         /* Release old quota file names */
3507         for (i = 0; i < MAXQUOTAS; i++)
3508                 if (old_opts.s_qf_names[i] &&
3509                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3510                         kfree(old_opts.s_qf_names[i]);
3511 #endif
3512         return 0;
3513 restore_opts:
3514         sb->s_flags = old_sb_flags;
3515         sbi->s_mount_opt = old_opts.s_mount_opt;
3516         sbi->s_resuid = old_opts.s_resuid;
3517         sbi->s_resgid = old_opts.s_resgid;
3518         sbi->s_commit_interval = old_opts.s_commit_interval;
3519         sbi->s_min_batch_time = old_opts.s_min_batch_time;
3520         sbi->s_max_batch_time = old_opts.s_max_batch_time;
3521 #ifdef CONFIG_QUOTA
3522         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3523         for (i = 0; i < MAXQUOTAS; i++) {
3524                 if (sbi->s_qf_names[i] &&
3525                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3526                         kfree(sbi->s_qf_names[i]);
3527                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3528         }
3529 #endif
3530         return err;
3531 }
3532
3533 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3534 {
3535         struct super_block *sb = dentry->d_sb;
3536         struct ext4_sb_info *sbi = EXT4_SB(sb);
3537         struct ext4_super_block *es = sbi->s_es;
3538         u64 fsid;
3539
3540         if (test_opt(sb, MINIX_DF)) {
3541                 sbi->s_overhead_last = 0;
3542         } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3543                 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3544                 ext4_fsblk_t overhead = 0;
3545
3546                 /*
3547                  * Compute the overhead (FS structures).  This is constant
3548                  * for a given filesystem unless the number of block groups
3549                  * changes so we cache the previous value until it does.
3550                  */
3551
3552                 /*
3553                  * All of the blocks before first_data_block are
3554                  * overhead
3555                  */
3556                 overhead = le32_to_cpu(es->s_first_data_block);
3557
3558                 /*
3559                  * Add the overhead attributed to the superblock and
3560                  * block group descriptors.  If the sparse superblocks
3561                  * feature is turned on, then not all groups have this.
3562                  */
3563                 for (i = 0; i < ngroups; i++) {
3564                         overhead += ext4_bg_has_super(sb, i) +
3565                                 ext4_bg_num_gdb(sb, i);
3566                         cond_resched();
3567                 }
3568
3569                 /*
3570                  * Every block group has an inode bitmap, a block
3571                  * bitmap, and an inode table.
3572                  */
3573                 overhead += ngroups * (2 + sbi->s_itb_per_group);
3574                 sbi->s_overhead_last = overhead;
3575                 smp_wmb();
3576                 sbi->s_blocks_last = ext4_blocks_count(es);
3577         }
3578
3579         buf->f_type = EXT4_SUPER_MAGIC;
3580         buf->f_bsize = sb->s_blocksize;
3581         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3582         buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3583                        percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3584         ext4_free_blocks_count_set(es, buf->f_bfree);
3585         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3586         if (buf->f_bfree < ext4_r_blocks_count(es))
3587                 buf->f_bavail = 0;
3588         buf->f_files = le32_to_cpu(es->s_inodes_count);
3589         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3590         es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3591         buf->f_namelen = EXT4_NAME_LEN;
3592         fsid = le64_to_cpup((void *)es->s_uuid) ^
3593                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3594         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3595         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3596         return 0;
3597 }
3598
3599 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3600  * is locked for write. Otherwise the are possible deadlocks:
3601  * Process 1                         Process 2
3602  * ext4_create()                     quota_sync()
3603  *   jbd2_journal_start()                  write_dquot()
3604  *   vfs_dq_init()                         down(dqio_mutex)
3605  *     down(dqio_mutex)                    jbd2_journal_start()
3606  *
3607  */
3608
3609 #ifdef CONFIG_QUOTA
3610
3611 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3612 {
3613         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3614 }
3615
3616 static int ext4_write_dquot(struct dquot *dquot)
3617 {
3618         int ret, err;
3619         handle_t *handle;
3620         struct inode *inode;
3621
3622         inode = dquot_to_inode(dquot);
3623         handle = ext4_journal_start(inode,
3624                                         EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3625         if (IS_ERR(handle))
3626                 return PTR_ERR(handle);
3627         ret = dquot_commit(dquot);
3628         err = ext4_journal_stop(handle);
3629         if (!ret)
3630                 ret = err;
3631         return ret;
3632 }
3633
3634 static int ext4_acquire_dquot(struct dquot *dquot)
3635 {
3636         int ret, err;
3637         handle_t *handle;
3638
3639         handle = ext4_journal_start(dquot_to_inode(dquot),
3640                                         EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3641         if (IS_ERR(handle))
3642                 return PTR_ERR(handle);
3643         ret = dquot_acquire(dquot);
3644         err = ext4_journal_stop(handle);
3645         if (!ret)
3646                 ret = err;
3647         return ret;
3648 }
3649
3650 static int ext4_release_dquot(struct dquot *dquot)
3651 {
3652         int ret, err;
3653         handle_t *handle;
3654
3655         handle = ext4_journal_start(dquot_to_inode(dquot),
3656                                         EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3657         if (IS_ERR(handle)) {
3658                 /* Release dquot anyway to avoid endless cycle in dqput() */
3659                 dquot_release(dquot);
3660                 return PTR_ERR(handle);
3661         }
3662         ret = dquot_release(dquot);
3663         err = ext4_journal_stop(handle);
3664         if (!ret)
3665                 ret = err;
3666         return ret;
3667 }
3668
3669 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3670 {
3671         /* Are we journaling quotas? */
3672         if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3673             EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3674                 dquot_mark_dquot_dirty(dquot);
3675                 return ext4_write_dquot(dquot);
3676         } else {
3677                 return dquot_mark_dquot_dirty(dquot);
3678         }
3679 }
3680
3681 static int ext4_write_info(struct super_block *sb, int type)
3682 {
3683         int ret, err;
3684         handle_t *handle;
3685
3686         /* Data block + inode block */
3687         handle = ext4_journal_start(sb->s_root->d_inode, 2);
3688         if (IS_ERR(handle))
3689                 return PTR_ERR(handle);
3690         ret = dquot_commit_info(sb, type);
3691         err = ext4_journal_stop(handle);
3692         if (!ret)
3693                 ret = err;
3694         return ret;
3695 }
3696
3697 /*
3698  * Turn on quotas during mount time - we need to find
3699  * the quota file and such...
3700  */
3701 static int ext4_quota_on_mount(struct super_block *sb, int type)
3702 {
3703         return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3704                         EXT4_SB(sb)->s_jquota_fmt, type);
3705 }
3706
3707 /*
3708  * Standard function to be called on quota_on
3709  */
3710 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3711                          char *name, int remount)
3712 {
3713         int err;
3714         struct path path;
3715
3716         if (!test_opt(sb, QUOTA))
3717                 return -EINVAL;
3718         /* When remounting, no checks are needed and in fact, name is NULL */
3719         if (remount)
3720                 return vfs_quota_on(sb, type, format_id, name, remount);
3721
3722         err = kern_path(name, LOOKUP_FOLLOW, &path);
3723         if (err)
3724                 return err;
3725
3726         /* Quotafile not on the same filesystem? */
3727         if (path.mnt->mnt_sb != sb) {
3728                 path_put(&path);
3729                 return -EXDEV;
3730         }
3731         /* Journaling quota? */
3732         if (EXT4_SB(sb)->s_qf_names[type]) {
3733                 /* Quotafile not in fs root? */
3734                 if (path.dentry->d_parent != sb->s_root)
3735                         printk(KERN_WARNING
3736                                 "EXT4-fs: Quota file not on filesystem root. "
3737                                 "Journaled quota will not work.\n");
3738         }
3739
3740         /*
3741          * When we journal data on quota file, we have to flush journal to see
3742          * all updates to the file when we bypass pagecache...
3743          */
3744         if (EXT4_SB(sb)->s_journal &&
3745             ext4_should_journal_data(path.dentry->d_inode)) {
3746                 /*
3747                  * We don't need to lock updates but journal_flush() could
3748                  * otherwise be livelocked...
3749                  */
3750                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3751                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3752                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3753                 if (err) {
3754                         path_put(&path);
3755                         return err;
3756                 }
3757         }
3758
3759         err = vfs_quota_on_path(sb, type, format_id, &path);
3760         path_put(&path);
3761         return err;
3762 }
3763
3764 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3765  * acquiring the locks... As quota files are never truncated and quota code
3766  * itself serializes the operations (and noone else should touch the files)
3767  * we don't have to be afraid of races */
3768 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3769                                size_t len, loff_t off)
3770 {
3771         struct inode *inode = sb_dqopt(sb)->files[type];
3772         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3773         int err = 0;
3774         int offset = off & (sb->s_blocksize - 1);
3775         int tocopy;
3776         size_t toread;
3777         struct buffer_head *bh;
3778         loff_t i_size = i_size_read(inode);
3779
3780         if (off > i_size)
3781                 return 0;
3782         if (off+len > i_size)
3783                 len = i_size-off;
3784         toread = len;
3785         while (toread > 0) {
3786                 tocopy = sb->s_blocksize - offset < toread ?
3787                                 sb->s_blocksize - offset : toread;
3788                 bh = ext4_bread(NULL, inode, blk, 0, &err);
3789                 if (err)
3790                         return err;
3791                 if (!bh)        /* A hole? */
3792                         memset(data, 0, tocopy);
3793                 else
3794                         memcpy(data, bh->b_data+offset, tocopy);
3795                 brelse(bh);
3796                 offset = 0;
3797                 toread -= tocopy;
3798                 data += tocopy;
3799                 blk++;
3800         }
3801         return len;
3802 }
3803
3804 /* Write to quotafile (we know the transaction is already started and has
3805  * enough credits) */
3806 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3807                                 const char *data, size_t len, loff_t off)
3808 {
3809         struct inode *inode = sb_dqopt(sb)->files[type];
3810         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3811         int err = 0;
3812         int offset = off & (sb->s_blocksize - 1);
3813         int tocopy;
3814         int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3815         size_t towrite = len;
3816         struct buffer_head *bh;
3817         handle_t *handle = journal_current_handle();
3818
3819         if (EXT4_SB(sb)->s_journal && !handle) {
3820                 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3821                         " cancelled because transaction is not started.\n",
3822                         (unsigned long long)off, (unsigned long long)len);
3823                 return -EIO;
3824         }
3825         mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3826         while (towrite > 0) {
3827                 tocopy = sb->s_blocksize - offset < towrite ?
3828                                 sb->s_blocksize - offset : towrite;
3829                 bh = ext4_bread(handle, inode, blk, 1, &err);
3830                 if (!bh)
3831                         goto out;
3832                 if (journal_quota) {
3833                         err = ext4_journal_get_write_access(handle, bh);
3834                         if (err) {
3835                                 brelse(bh);
3836                                 goto out;
3837                         }
3838                 }
3839                 lock_buffer(bh);
3840                 memcpy(bh->b_data+offset, data, tocopy);
3841                 flush_dcache_page(bh->b_page);
3842                 unlock_buffer(bh);
3843                 if (journal_quota)
3844                         err = ext4_handle_dirty_metadata(handle, NULL, bh);
3845                 else {
3846                         /* Always do at least ordered writes for quotas */
3847                         err = ext4_jbd2_file_inode(handle, inode);
3848                         mark_buffer_dirty(bh);
3849                 }
3850                 brelse(bh);
3851                 if (err)
3852                         goto out;
3853                 offset = 0;
3854                 towrite -= tocopy;
3855                 data += tocopy;
3856                 blk++;
3857         }
3858 out:
3859         if (len == towrite) {
3860                 mutex_unlock(&inode->i_mutex);
3861                 return err;
3862         }
3863         if (inode->i_size < off+len-towrite) {
3864                 i_size_write(inode, off+len-towrite);
3865                 EXT4_I(inode)->i_disksize = inode->i_size;
3866         }
3867         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3868         ext4_mark_inode_dirty(handle, inode);
3869         mutex_unlock(&inode->i_mutex);
3870         return len - towrite;
3871 }
3872
3873 #endif
3874
3875 static int ext4_get_sb(struct file_system_type *fs_type,
3876         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3877 {
3878         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3879 }
3880
3881 static struct file_system_type ext4_fs_type = {
3882         .owner          = THIS_MODULE,
3883         .name           = "ext4",
3884         .get_sb         = ext4_get_sb,
3885         .kill_sb        = kill_block_super,
3886         .fs_flags       = FS_REQUIRES_DEV,
3887 };
3888
3889 #ifdef CONFIG_EXT4DEV_COMPAT
3890 static int ext4dev_get_sb(struct file_system_type *fs_type,
3891         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3892 {
3893         printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3894                "to mount using ext4\n");
3895         printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3896                "will go away by 2.6.31\n");
3897         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3898 }
3899
3900 static struct file_system_type ext4dev_fs_type = {
3901         .owner          = THIS_MODULE,
3902         .name           = "ext4dev",
3903         .get_sb         = ext4dev_get_sb,
3904         .kill_sb        = kill_block_super,
3905         .fs_flags       = FS_REQUIRES_DEV,
3906 };
3907 MODULE_ALIAS("ext4dev");
3908 #endif
3909
3910 static int __init init_ext4_fs(void)
3911 {
3912         int err;
3913
3914         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3915         if (!ext4_kset)
3916                 return -ENOMEM;
3917         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3918         err = init_ext4_mballoc();
3919         if (err)
3920                 return err;
3921
3922         err = init_ext4_xattr();
3923         if (err)
3924                 goto out2;
3925         err = init_inodecache();
3926         if (err)
3927                 goto out1;
3928         err = register_filesystem(&ext4_fs_type);
3929         if (err)
3930                 goto out;
3931 #ifdef CONFIG_EXT4DEV_COMPAT
3932         err = register_filesystem(&ext4dev_fs_type);
3933         if (err) {
3934                 unregister_filesystem(&ext4_fs_type);
3935                 goto out;
3936         }
3937 #endif
3938         return 0;
3939 out:
3940         destroy_inodecache();
3941 out1:
3942         exit_ext4_xattr();
3943 out2:
3944         exit_ext4_mballoc();
3945         return err;
3946 }
3947
3948 static void __exit exit_ext4_fs(void)
3949 {
3950         unregister_filesystem(&ext4_fs_type);
3951 #ifdef CONFIG_EXT4DEV_COMPAT
3952         unregister_filesystem(&ext4dev_fs_type);
3953 #endif
3954         destroy_inodecache();
3955         exit_ext4_xattr();
3956         exit_ext4_mballoc();
3957         remove_proc_entry("fs/ext4", NULL);
3958         kset_unregister(ext4_kset);
3959 }
3960
3961 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3962 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3963 MODULE_LICENSE("GPL");
3964 module_init(init_ext4_fs)
3965 module_exit(exit_ext4_fs)