git.oblomov.eu Git - linux-2.6/blob - fs/ext4/super.c

   1 /*
   2  *  linux/fs/ext4/super.c
   3  *
   4  * Copyright (C) 1992, 1993, 1994, 1995
   5  * Remy Card (card@masi.ibp.fr)
   6  * Laboratoire MASI - Institut Blaise Pascal
   7  * Universite Pierre et Marie Curie (Paris VI)
   8  *
   9  *  from
  10  *
  11  *  linux/fs/minix/inode.c
  12  *
  13  *  Copyright (C) 1991, 1992  Linus Torvalds
  14  *
  15  *  Big-endian to little-endian byte-swapping/bitmaps by
  16  *        David S. Miller (davem@caip.rutgers.edu), 1995
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/string.h>
  21 #include <linux/fs.h>
  22 #include <linux/time.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/jbd2.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/blkdev.h>
  28 #include <linux/parser.h>
  29 #include <linux/smp_lock.h>
  30 #include <linux/buffer_head.h>
  31 #include <linux/exportfs.h>
  32 #include <linux/vfs.h>
  33 #include <linux/random.h>
  34 #include <linux/mount.h>
  35 #include <linux/namei.h>
  36 #include <linux/quotaops.h>
  37 #include <linux/seq_file.h>
  38 #include <linux/proc_fs.h>
  39 #include <linux/ctype.h>
  40 #include <linux/marker.h>
  41 #include <linux/log2.h>
  42 #include <linux/crc16.h>
  43 #include <asm/uaccess.h>
  44
  45 #include "ext4.h"
  46 #include "ext4_jbd2.h"
  47 #include "xattr.h"
  48 #include "acl.h"
  49
  50 static int default_mb_history_length = 1000;
  51
  52 module_param_named(default_mb_history_length, default_mb_history_length,
  53                    int, 0644);
  54 MODULE_PARM_DESC(default_mb_history_length,
  55                  "Default number of entries saved for mb_history");
  56
  57 struct proc_dir_entry *ext4_proc_root;
  58 static struct kset *ext4_kset;
  59
  60 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  61                              unsigned long journal_devnum);
  62 static int ext4_commit_super(struct super_block *sb, int sync);
  63 static void ext4_mark_recovery_complete(struct super_block *sb,
  64                                         struct ext4_super_block *es);
  65 static void ext4_clear_journal_err(struct super_block *sb,
  66                                    struct ext4_super_block *es);
  67 static int ext4_sync_fs(struct super_block *sb, int wait);
  68 static const char *ext4_decode_error(struct super_block *sb, int errno,
  69                                      char nbuf[16]);
  70 static int ext4_remount(struct super_block *sb, int *flags, char *data);
  71 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  72 static int ext4_unfreeze(struct super_block *sb);
  73 static void ext4_write_super(struct super_block *sb);
  74 static int ext4_freeze(struct super_block *sb);
  75
  76
  77 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
  78                                struct ext4_group_desc *bg)
  79 {
  80         return le32_to_cpu(bg->bg_block_bitmap_lo) |
  81                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  82                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
  83 }
  84
  85 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
  86                                struct ext4_group_desc *bg)
  87 {
  88         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
  89                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  90                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
  91 }
  92
  93 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
  94                               struct ext4_group_desc *bg)
  95 {
  96         return le32_to_cpu(bg->bg_inode_table_lo) |
  97                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
  98                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
  99 }
 100
 101 __u32 ext4_free_blks_count(struct super_block *sb,
 102                               struct ext4_group_desc *bg)
 103 {
 104         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 105                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 106                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 107 }
 108
 109 __u32 ext4_free_inodes_count(struct super_block *sb,
 110                               struct ext4_group_desc *bg)
 111 {
 112         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 113                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 114                  (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 115 }
 116
 117 __u32 ext4_used_dirs_count(struct super_block *sb,
 118                               struct ext4_group_desc *bg)
 119 {
 120         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 121                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 122                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 123 }
 124
 125 __u32 ext4_itable_unused_count(struct super_block *sb,
 126                               struct ext4_group_desc *bg)
 127 {
 128         return le16_to_cpu(bg->bg_itable_unused_lo) |
 129                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 130                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 131 }
 132
 133 void ext4_block_bitmap_set(struct super_block *sb,
 134                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 135 {
 136         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 137         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 138                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 139 }
 140
 141 void ext4_inode_bitmap_set(struct super_block *sb,
 142                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 143 {
 144         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 145         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 146                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 147 }
 148
 149 void ext4_inode_table_set(struct super_block *sb,
 150                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 151 {
 152         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 153         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 154                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 155 }
 156
 157 void ext4_free_blks_set(struct super_block *sb,
 158                           struct ext4_group_desc *bg, __u32 count)
 159 {
 160         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 161         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 162                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 163 }
 164
 165 void ext4_free_inodes_set(struct super_block *sb,
 166                           struct ext4_group_desc *bg, __u32 count)
 167 {
 168         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 169         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 170                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 171 }
 172
 173 void ext4_used_dirs_set(struct super_block *sb,
 174                           struct ext4_group_desc *bg, __u32 count)
 175 {
 176         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 177         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 178                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 179 }
 180
 181 void ext4_itable_unused_set(struct super_block *sb,
 182                           struct ext4_group_desc *bg, __u32 count)
 183 {
 184         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 185         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 186                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 187 }
 188
 189 /*
 190  * Wrappers for jbd2_journal_start/end.
 191  *
 192  * The only special thing we need to do here is to make sure that all
 193  * journal_end calls result in the superblock being marked dirty, so
 194  * that sync() will call the filesystem's write_super callback if
 195  * appropriate.
 196  */
 197 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 198 {
 199         journal_t *journal;
 200
 201         if (sb->s_flags & MS_RDONLY)
 202                 return ERR_PTR(-EROFS);
 203
 204         /* Special case here: if the journal has aborted behind our
 205          * backs (eg. EIO in the commit thread), then we still need to
 206          * take the FS itself readonly cleanly. */
 207         journal = EXT4_SB(sb)->s_journal;
 208         if (journal) {
 209                 if (is_journal_aborted(journal)) {
 210                         ext4_abort(sb, __func__, "Detected aborted journal");
 211                         return ERR_PTR(-EROFS);
 212                 }
 213                 return jbd2_journal_start(journal, nblocks);
 214         }
 215         /*
 216          * We're not journaling, return the appropriate indication.
 217          */
 218         current->journal_info = EXT4_NOJOURNAL_HANDLE;
 219         return current->journal_info;
 220 }
 221
 222 /*
 223  * The only special thing we need to do here is to make sure that all
 224  * jbd2_journal_stop calls result in the superblock being marked dirty, so
 225  * that sync() will call the filesystem's write_super callback if
 226  * appropriate.
 227  */
 228 int __ext4_journal_stop(const char *where, handle_t *handle)
 229 {
 230         struct super_block *sb;
 231         int err;
 232         int rc;
 233
 234         if (!ext4_handle_valid(handle)) {
 235                 /*
 236                  * Do this here since we don't call jbd2_journal_stop() in
 237                  * no-journal mode.
 238                  */
 239                 current->journal_info = NULL;
 240                 return 0;
 241         }
 242         sb = handle->h_transaction->t_journal->j_private;
 243         err = handle->h_err;
 244         rc = jbd2_journal_stop(handle);
 245
 246         if (!err)
 247                 err = rc;
 248         if (err)
 249                 __ext4_std_error(sb, where, err);
 250         return err;
 251 }
 252
 253 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 254                 struct buffer_head *bh, handle_t *handle, int err)
 255 {
 256         char nbuf[16];
 257         const char *errstr = ext4_decode_error(NULL, err, nbuf);
 258
 259         BUG_ON(!ext4_handle_valid(handle));
 260
 261         if (bh)
 262                 BUFFER_TRACE(bh, "abort");
 263
 264         if (!handle->h_err)
 265                 handle->h_err = err;
 266
 267         if (is_handle_aborted(handle))
 268                 return;
 269
 270         printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
 271                caller, errstr, err_fn);
 272
 273         jbd2_journal_abort_handle(handle);
 274 }
 275
 276 /* Deal with the reporting of failure conditions on a filesystem such as
 277  * inconsistencies detected or read IO failures.
 278  *
 279  * On ext2, we can store the error state of the filesystem in the
 280  * superblock.  That is not possible on ext4, because we may have other
 281  * write ordering constraints on the superblock which prevent us from
 282  * writing it out straight away; and given that the journal is about to
 283  * be aborted, we can't rely on the current, or future, transactions to
 284  * write out the superblock safely.
 285  *
 286  * We'll just use the jbd2_journal_abort() error code to record an error in
 287  * the journal instead.  On recovery, the journal will compain about
 288  * that error until we've noted it down and cleared it.
 289  */
 290
 291 static void ext4_handle_error(struct super_block *sb)
 292 {
 293         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 294
 295         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 296         es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 297
 298         if (sb->s_flags & MS_RDONLY)
 299                 return;
 300
 301         if (!test_opt(sb, ERRORS_CONT)) {
 302                 journal_t *journal = EXT4_SB(sb)->s_journal;
 303
 304                 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 305                 if (journal)
 306                         jbd2_journal_abort(journal, -EIO);
 307         }
 308         if (test_opt(sb, ERRORS_RO)) {
 309                 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 310                 sb->s_flags |= MS_RDONLY;
 311         }
 312         ext4_commit_super(sb, 1);
 313         if (test_opt(sb, ERRORS_PANIC))
 314                 panic("EXT4-fs (device %s): panic forced after error\n",
 315                         sb->s_id);
 316 }
 317
 318 void ext4_error(struct super_block *sb, const char *function,
 319                 const char *fmt, ...)
 320 {
 321         va_list args;
 322
 323         va_start(args, fmt);
 324         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 325         vprintk(fmt, args);
 326         printk("\n");
 327         va_end(args);
 328
 329         ext4_handle_error(sb);
 330 }
 331
 332 static const char *ext4_decode_error(struct super_block *sb, int errno,
 333                                      char nbuf[16])
 334 {
 335         char *errstr = NULL;
 336
 337         switch (errno) {
 338         case -EIO:
 339                 errstr = "IO failure";
 340                 break;
 341         case -ENOMEM:
 342                 errstr = "Out of memory";
 343                 break;
 344         case -EROFS:
 345                 if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
 346                         errstr = "Journal has aborted";
 347                 else
 348                         errstr = "Readonly filesystem";
 349                 break;
 350         default:
 351                 /* If the caller passed in an extra buffer for unknown
 352                  * errors, textualise them now.  Else we just return
 353                  * NULL. */
 354                 if (nbuf) {
 355                         /* Check for truncated error codes... */
 356                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 357                                 errstr = nbuf;
 358                 }
 359                 break;
 360         }
 361
 362         return errstr;
 363 }
 364
 365 /* __ext4_std_error decodes expected errors from journaling functions
 366  * automatically and invokes the appropriate error response.  */
 367
 368 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
 369 {
 370         char nbuf[16];
 371         const char *errstr;
 372
 373         /* Special case: if the error is EROFS, and we're not already
 374          * inside a transaction, then there's really no point in logging
 375          * an error. */
 376         if (errno == -EROFS && journal_current_handle() == NULL &&
 377             (sb->s_flags & MS_RDONLY))
 378                 return;
 379
 380         errstr = ext4_decode_error(sb, errno, nbuf);
 381         printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 382                sb->s_id, function, errstr);
 383
 384         ext4_handle_error(sb);
 385 }
 386
 387 /*
 388  * ext4_abort is a much stronger failure handler than ext4_error.  The
 389  * abort function may be used to deal with unrecoverable failures such
 390  * as journal IO errors or ENOMEM at a critical moment in log management.
 391  *
 392  * We unconditionally force the filesystem into an ABORT|READONLY state,
 393  * unless the error response on the fs has been set to panic in which
 394  * case we take the easy way out and panic immediately.
 395  */
 396
 397 void ext4_abort(struct super_block *sb, const char *function,
 398                 const char *fmt, ...)
 399 {
 400         va_list args;
 401
 402         va_start(args, fmt);
 403         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 404         vprintk(fmt, args);
 405         printk("\n");
 406         va_end(args);
 407
 408         if (test_opt(sb, ERRORS_PANIC))
 409                 panic("EXT4-fs panic from previous error\n");
 410
 411         if (sb->s_flags & MS_RDONLY)
 412                 return;
 413
 414         ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 415         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 416         sb->s_flags |= MS_RDONLY;
 417         EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 418         if (EXT4_SB(sb)->s_journal)
 419                 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 420 }
 421
 422 void ext4_msg (struct super_block * sb, const char *prefix,
 423                    const char *fmt, ...)
 424 {
 425         va_list args;
 426
 427         va_start(args, fmt);
 428         printk("%sEXT4-fs (%s): ", prefix, sb->s_id);
 429         vprintk(fmt, args);
 430         printk("\n");
 431         va_end(args);
 432 }
 433
 434 void ext4_warning(struct super_block *sb, const char *function,
 435                   const char *fmt, ...)
 436 {
 437         va_list args;
 438
 439         va_start(args, fmt);
 440         printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 441                sb->s_id, function);
 442         vprintk(fmt, args);
 443         printk("\n");
 444         va_end(args);
 445 }
 446
 447 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
 448                            const char *function, const char *fmt, ...)
 449 __releases(bitlock)
 450 __acquires(bitlock)
 451 {
 452         va_list args;
 453         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 454
 455         va_start(args, fmt);
 456         printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
 457         vprintk(fmt, args);
 458         printk("\n");
 459         va_end(args);
 460
 461         if (test_opt(sb, ERRORS_CONT)) {
 462                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 463                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 464                 ext4_commit_super(sb, 0);
 465                 return;
 466         }
 467         ext4_unlock_group(sb, grp);
 468         ext4_handle_error(sb);
 469         /*
 470          * We only get here in the ERRORS_RO case; relocking the group
 471          * may be dangerous, but nothing bad will happen since the
 472          * filesystem will have already been marked read/only and the
 473          * journal has been aborted.  We return 1 as a hint to callers
 474          * who might what to use the return value from
 475          * ext4_grp_locked_error() to distinguish beween the
 476          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
 477          * aggressively from the ext4 function in question, with a
 478          * more appropriate error code.
 479          */
 480         ext4_lock_group(sb, grp);
 481         return;
 482 }
 483
 484 void ext4_update_dynamic_rev(struct super_block *sb)
 485 {
 486         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 487
 488         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 489                 return;
 490
 491         ext4_warning(sb, __func__,
 492                      "updating to rev %d because of new feature flag, "
 493                      "running e2fsck is recommended",
 494                      EXT4_DYNAMIC_REV);
 495
 496         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
 497         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
 498         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 499         /* leave es->s_feature_*compat flags alone */
 500         /* es->s_uuid will be set by e2fsck if empty */
 501
 502         /*
 503          * The rest of the superblock fields should be zero, and if not it
 504          * means they are likely already in use, so leave them alone.  We
 505          * can leave it up to e2fsck to clean up any inconsistencies there.
 506          */
 507 }
 508
 509 /*
 510  * Open the external journal device
 511  */
 512 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 513 {
 514         struct block_device *bdev;
 515         char b[BDEVNAME_SIZE];
 516
 517         bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 518         if (IS_ERR(bdev))
 519                 goto fail;
 520         return bdev;
 521
 522 fail:
 523         ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
 524                         __bdevname(dev, b), PTR_ERR(bdev));
 525         return NULL;
 526 }
 527
 528 /*
 529  * Release the journal device
 530  */
 531 static int ext4_blkdev_put(struct block_device *bdev)
 532 {
 533         bd_release(bdev);
 534         return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
 535 }
 536
 537 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 538 {
 539         struct block_device *bdev;
 540         int ret = -ENODEV;
 541
 542         bdev = sbi->journal_bdev;
 543         if (bdev) {
 544                 ret = ext4_blkdev_put(bdev);
 545                 sbi->journal_bdev = NULL;
 546         }
 547         return ret;
 548 }
 549
 550 static inline struct inode *orphan_list_entry(struct list_head *l)
 551 {
 552         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 553 }
 554
 555 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 556 {
 557         struct list_head *l;
 558
 559         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
 560                  le32_to_cpu(sbi->s_es->s_last_orphan));
 561
 562         printk(KERN_ERR "sb_info orphan list:\n");
 563         list_for_each(l, &sbi->s_orphan) {
 564                 struct inode *inode = orphan_list_entry(l);
 565                 printk(KERN_ERR "  "
 566                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
 567                        inode->i_sb->s_id, inode->i_ino, inode,
 568                        inode->i_mode, inode->i_nlink,
 569                        NEXT_ORPHAN(inode));
 570         }
 571 }
 572
 573 static void ext4_put_super(struct super_block *sb)
 574 {
 575         struct ext4_sb_info *sbi = EXT4_SB(sb);
 576         struct ext4_super_block *es = sbi->s_es;
 577         int i, err;
 578
 579         ext4_release_system_zone(sb);
 580         ext4_mb_release(sb);
 581         ext4_ext_release(sb);
 582         ext4_xattr_put_super(sb);
 583         if (sbi->s_journal) {
 584                 err = jbd2_journal_destroy(sbi->s_journal);
 585                 sbi->s_journal = NULL;
 586                 if (err < 0)
 587                         ext4_abort(sb, __func__,
 588                                    "Couldn't clean up the journal");
 589         }
 590         if (!(sb->s_flags & MS_RDONLY)) {
 591                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 592                 es->s_state = cpu_to_le16(sbi->s_mount_state);
 593                 ext4_commit_super(sb, 1);
 594         }
 595         if (sbi->s_proc) {
 596                 remove_proc_entry(sb->s_id, ext4_proc_root);
 597         }
 598         kobject_del(&sbi->s_kobj);
 599
 600         for (i = 0; i < sbi->s_gdb_count; i++)
 601                 brelse(sbi->s_group_desc[i]);
 602         kfree(sbi->s_group_desc);
 603         if (is_vmalloc_addr(sbi->s_flex_groups))
 604                 vfree(sbi->s_flex_groups);
 605         else
 606                 kfree(sbi->s_flex_groups);
 607         percpu_counter_destroy(&sbi->s_freeblocks_counter);
 608         percpu_counter_destroy(&sbi->s_freeinodes_counter);
 609         percpu_counter_destroy(&sbi->s_dirs_counter);
 610         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 611         brelse(sbi->s_sbh);
 612 #ifdef CONFIG_QUOTA
 613         for (i = 0; i < MAXQUOTAS; i++)
 614                 kfree(sbi->s_qf_names[i]);
 615 #endif
 616
 617         /* Debugging code just in case the in-memory inode orphan list
 618          * isn't empty.  The on-disk one can be non-empty if we've
 619          * detected an error and taken the fs readonly, but the
 620          * in-memory list had better be clean by this point. */
 621         if (!list_empty(&sbi->s_orphan))
 622                 dump_orphan_list(sb, sbi);
 623         J_ASSERT(list_empty(&sbi->s_orphan));
 624
 625         invalidate_bdev(sb->s_bdev);
 626         if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
 627                 /*
 628                  * Invalidate the journal device's buffers.  We don't want them
 629                  * floating about in memory - the physical journal device may
 630                  * hotswapped, and it breaks the `ro-after' testing code.
 631                  */
 632                 sync_blockdev(sbi->journal_bdev);
 633                 invalidate_bdev(sbi->journal_bdev);
 634                 ext4_blkdev_remove(sbi);
 635         }
 636         sb->s_fs_info = NULL;
 637         /*
 638          * Now that we are completely done shutting down the
 639          * superblock, we need to actually destroy the kobject.
 640          */
 641         unlock_kernel();
 642         unlock_super(sb);
 643         kobject_put(&sbi->s_kobj);
 644         wait_for_completion(&sbi->s_kobj_unregister);
 645         lock_super(sb);
 646         lock_kernel();
 647         kfree(sbi->s_blockgroup_lock);
 648         kfree(sbi);
 649 }
 650
 651 static struct kmem_cache *ext4_inode_cachep;
 652
 653 /*
 654  * Called inside transaction, so use GFP_NOFS
 655  */
 656 static struct inode *ext4_alloc_inode(struct super_block *sb)
 657 {
 658         struct ext4_inode_info *ei;
 659
 660         ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 661         if (!ei)
 662                 return NULL;
 663
 664 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 665         ei->i_acl = EXT4_ACL_NOT_CACHED;
 666         ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 667 #endif
 668         ei->vfs_inode.i_version = 1;
 669         ei->vfs_inode.i_data.writeback_index = 0;
 670         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 671         INIT_LIST_HEAD(&ei->i_prealloc_list);
 672         spin_lock_init(&ei->i_prealloc_lock);
 673         /*
 674          * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
 675          * therefore it can be null here.  Don't check it, just initialize
 676          * jinode.
 677          */
 678         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
 679         ei->i_reserved_data_blocks = 0;
 680         ei->i_reserved_meta_blocks = 0;
 681         ei->i_allocated_meta_blocks = 0;
 682         ei->i_delalloc_reserved_flag = 0;
 683         spin_lock_init(&(ei->i_block_reservation_lock));
 684
 685         return &ei->vfs_inode;
 686 }
 687
 688 static void ext4_destroy_inode(struct inode *inode)
 689 {
 690         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 691                 ext4_msg(inode->i_sb, KERN_ERR,
 692                          "Inode %lu (%p): orphan list check failed!",
 693                          inode->i_ino, EXT4_I(inode));
 694                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
 695                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
 696                                 true);
 697                 dump_stack();
 698         }
 699         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 700 }
 701
 702 static void init_once(void *foo)
 703 {
 704         struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 705
 706         INIT_LIST_HEAD(&ei->i_orphan);
 707 #ifdef CONFIG_EXT4_FS_XATTR
 708         init_rwsem(&ei->xattr_sem);
 709 #endif
 710         init_rwsem(&ei->i_data_sem);
 711         inode_init_once(&ei->vfs_inode);
 712 }
 713
 714 static int init_inodecache(void)
 715 {
 716         ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
 717                                              sizeof(struct ext4_inode_info),
 718                                              0, (SLAB_RECLAIM_ACCOUNT|
 719                                                 SLAB_MEM_SPREAD),
 720                                              init_once);
 721         if (ext4_inode_cachep == NULL)
 722                 return -ENOMEM;
 723         return 0;
 724 }
 725
 726 static void destroy_inodecache(void)
 727 {
 728         kmem_cache_destroy(ext4_inode_cachep);
 729 }
 730
 731 static void ext4_clear_inode(struct inode *inode)
 732 {
 733 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 734         if (EXT4_I(inode)->i_acl &&
 735                         EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
 736                 posix_acl_release(EXT4_I(inode)->i_acl);
 737                 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
 738         }
 739         if (EXT4_I(inode)->i_default_acl &&
 740                         EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
 741                 posix_acl_release(EXT4_I(inode)->i_default_acl);
 742                 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
 743         }
 744 #endif
 745         ext4_discard_preallocations(inode);
 746         if (EXT4_JOURNAL(inode))
 747                 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
 748                                        &EXT4_I(inode)->jinode);
 749 }
 750
 751 static inline void ext4_show_quota_options(struct seq_file *seq,
 752                                            struct super_block *sb)
 753 {
 754 #if defined(CONFIG_QUOTA)
 755         struct ext4_sb_info *sbi = EXT4_SB(sb);
 756
 757         if (sbi->s_jquota_fmt)
 758                 seq_printf(seq, ",jqfmt=%s",
 759                 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
 760
 761         if (sbi->s_qf_names[USRQUOTA])
 762                 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
 763
 764         if (sbi->s_qf_names[GRPQUOTA])
 765                 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 766
 767         if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 768                 seq_puts(seq, ",usrquota");
 769
 770         if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 771                 seq_puts(seq, ",grpquota");
 772 #endif
 773 }
 774
 775 /*
 776  * Show an option if
 777  *  - it's set to a non-default value OR
 778  *  - if the per-sb default is different from the global default
 779  */
 780 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 781 {
 782         int def_errors;
 783         unsigned long def_mount_opts;
 784         struct super_block *sb = vfs->mnt_sb;
 785         struct ext4_sb_info *sbi = EXT4_SB(sb);
 786         struct ext4_super_block *es = sbi->s_es;
 787
 788         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 789         def_errors     = le16_to_cpu(es->s_errors);
 790
 791         if (sbi->s_sb_block != 1)
 792                 seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
 793         if (test_opt(sb, MINIX_DF))
 794                 seq_puts(seq, ",minixdf");
 795         if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
 796                 seq_puts(seq, ",grpid");
 797         if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
 798                 seq_puts(seq, ",nogrpid");
 799         if (sbi->s_resuid != EXT4_DEF_RESUID ||
 800             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
 801                 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
 802         }
 803         if (sbi->s_resgid != EXT4_DEF_RESGID ||
 804             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
 805                 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
 806         }
 807         if (test_opt(sb, ERRORS_RO)) {
 808                 if (def_errors == EXT4_ERRORS_PANIC ||
 809                     def_errors == EXT4_ERRORS_CONTINUE) {
 810                         seq_puts(seq, ",errors=remount-ro");
 811                 }
 812         }
 813         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
 814                 seq_puts(seq, ",errors=continue");
 815         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
 816                 seq_puts(seq, ",errors=panic");
 817         if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
 818                 seq_puts(seq, ",nouid32");
 819         if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
 820                 seq_puts(seq, ",debug");
 821         if (test_opt(sb, OLDALLOC))
 822                 seq_puts(seq, ",oldalloc");
 823 #ifdef CONFIG_EXT4_FS_XATTR
 824         if (test_opt(sb, XATTR_USER) &&
 825                 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
 826                 seq_puts(seq, ",user_xattr");
 827         if (!test_opt(sb, XATTR_USER) &&
 828             (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
 829                 seq_puts(seq, ",nouser_xattr");
 830         }
 831 #endif
 832 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 833         if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
 834                 seq_puts(seq, ",acl");
 835         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
 836                 seq_puts(seq, ",noacl");
 837 #endif
 838         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
 839                 seq_printf(seq, ",commit=%u",
 840                            (unsigned) (sbi->s_commit_interval / HZ));
 841         }
 842         if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
 843                 seq_printf(seq, ",min_batch_time=%u",
 844                            (unsigned) sbi->s_min_batch_time);
 845         }
 846         if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
 847                 seq_printf(seq, ",max_batch_time=%u",
 848                            (unsigned) sbi->s_min_batch_time);
 849         }
 850
 851         /*
 852          * We're changing the default of barrier mount option, so
 853          * let's always display its mount state so it's clear what its
 854          * status is.
 855          */
 856         seq_puts(seq, ",barrier=");
 857         seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 858         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 859                 seq_puts(seq, ",journal_async_commit");
 860         if (test_opt(sb, NOBH))
 861                 seq_puts(seq, ",nobh");
 862         if (test_opt(sb, I_VERSION))
 863                 seq_puts(seq, ",i_version");
 864         if (!test_opt(sb, DELALLOC))
 865                 seq_puts(seq, ",nodelalloc");
 866
 867
 868         if (sbi->s_stripe)
 869                 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 870         /*
 871          * journal mode get enabled in different ways
 872          * So just print the value even if we didn't specify it
 873          */
 874         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 875                 seq_puts(seq, ",data=journal");
 876         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 877                 seq_puts(seq, ",data=ordered");
 878         else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 879                 seq_puts(seq, ",data=writeback");
 880
 881         if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
 882                 seq_printf(seq, ",inode_readahead_blks=%u",
 883                            sbi->s_inode_readahead_blks);
 884
 885         if (test_opt(sb, DATA_ERR_ABORT))
 886                 seq_puts(seq, ",data_err=abort");
 887
 888         if (test_opt(sb, NO_AUTO_DA_ALLOC))
 889                 seq_puts(seq, ",noauto_da_alloc");
 890
 891         ext4_show_quota_options(seq, sb);
 892
 893         return 0;
 894 }
 895
 896 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 897                                         u64 ino, u32 generation)
 898 {
 899         struct inode *inode;
 900
 901         if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 902                 return ERR_PTR(-ESTALE);
 903         if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 904                 return ERR_PTR(-ESTALE);
 905
 906         /* iget isn't really right if the inode is currently unallocated!!
 907          *
 908          * ext4_read_inode will return a bad_inode if the inode had been
 909          * deleted, so we should be safe.
 910          *
 911          * Currently we don't know the generation for parent directory, so
 912          * a generation of 0 means "accept any"
 913          */
 914         inode = ext4_iget(sb, ino);
 915         if (IS_ERR(inode))
 916                 return ERR_CAST(inode);
 917         if (generation && inode->i_generation != generation) {
 918                 iput(inode);
 919                 return ERR_PTR(-ESTALE);
 920         }
 921
 922         return inode;
 923 }
 924
 925 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
 926                                         int fh_len, int fh_type)
 927 {
 928         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
 929                                     ext4_nfs_get_inode);
 930 }
 931
 932 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
 933                                         int fh_len, int fh_type)
 934 {
 935         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
 936                                     ext4_nfs_get_inode);
 937 }
 938
 939 /*
 940  * Try to release metadata pages (indirect blocks, directories) which are
 941  * mapped via the block device.  Since these pages could have journal heads
 942  * which would prevent try_to_free_buffers() from freeing them, we must use
 943  * jbd2 layer's try_to_free_buffers() function to release them.
 944  */
 945 static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 946                                  gfp_t wait)
 947 {
 948         journal_t *journal = EXT4_SB(sb)->s_journal;
 949
 950         WARN_ON(PageChecked(page));
 951         if (!page_has_buffers(page))
 952                 return 0;
 953         if (journal)
 954                 return jbd2_journal_try_to_free_buffers(journal, page,
 955                                                         wait & ~__GFP_WAIT);
 956         return try_to_free_buffers(page);
 957 }
 958
 959 #ifdef CONFIG_QUOTA
 960 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
 961 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 962
 963 static int ext4_write_dquot(struct dquot *dquot);
 964 static int ext4_acquire_dquot(struct dquot *dquot);
 965 static int ext4_release_dquot(struct dquot *dquot);
 966 static int ext4_mark_dquot_dirty(struct dquot *dquot);
 967 static int ext4_write_info(struct super_block *sb, int type);
 968 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 969                                 char *path, int remount);
 970 static int ext4_quota_on_mount(struct super_block *sb, int type);
 971 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 972                                size_t len, loff_t off);
 973 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 974                                 const char *data, size_t len, loff_t off);
 975
 976 static struct dquot_operations ext4_quota_operations = {
 977         .initialize     = dquot_initialize,
 978         .drop           = dquot_drop,
 979         .alloc_space    = dquot_alloc_space,
 980         .reserve_space  = dquot_reserve_space,
 981         .claim_space    = dquot_claim_space,
 982         .release_rsv    = dquot_release_reserved_space,
 983         .get_reserved_space = ext4_get_reserved_space,
 984         .alloc_inode    = dquot_alloc_inode,
 985         .free_space     = dquot_free_space,
 986         .free_inode     = dquot_free_inode,
 987         .transfer       = dquot_transfer,
 988         .write_dquot    = ext4_write_dquot,
 989         .acquire_dquot  = ext4_acquire_dquot,
 990         .release_dquot  = ext4_release_dquot,
 991         .mark_dirty     = ext4_mark_dquot_dirty,
 992         .write_info     = ext4_write_info,
 993         .alloc_dquot    = dquot_alloc,
 994         .destroy_dquot  = dquot_destroy,
 995 };
 996
 997 static struct quotactl_ops ext4_qctl_operations = {
 998         .quota_on       = ext4_quota_on,
 999         .quota_off      = vfs_quota_off,
1000         .quota_sync     = vfs_quota_sync,
1001         .get_info       = vfs_get_dqinfo,
1002         .set_info       = vfs_set_dqinfo,
1003         .get_dqblk      = vfs_get_dqblk,
1004         .set_dqblk      = vfs_set_dqblk
1005 };
1006 #endif
1007
1008 static const struct super_operations ext4_sops = {
1009         .alloc_inode    = ext4_alloc_inode,
1010         .destroy_inode  = ext4_destroy_inode,
1011         .write_inode    = ext4_write_inode,
1012         .dirty_inode    = ext4_dirty_inode,
1013         .delete_inode   = ext4_delete_inode,
1014         .put_super      = ext4_put_super,
1015         .sync_fs        = ext4_sync_fs,
1016         .freeze_fs      = ext4_freeze,
1017         .unfreeze_fs    = ext4_unfreeze,
1018         .statfs         = ext4_statfs,
1019         .remount_fs     = ext4_remount,
1020         .clear_inode    = ext4_clear_inode,
1021         .show_options   = ext4_show_options,
1022 #ifdef CONFIG_QUOTA
1023         .quota_read     = ext4_quota_read,
1024         .quota_write    = ext4_quota_write,
1025 #endif
1026         .bdev_try_to_free_page = bdev_try_to_free_page,
1027 };
1028
1029 static const struct super_operations ext4_nojournal_sops = {
1030         .alloc_inode    = ext4_alloc_inode,
1031         .destroy_inode  = ext4_destroy_inode,
1032         .write_inode    = ext4_write_inode,
1033         .dirty_inode    = ext4_dirty_inode,
1034         .delete_inode   = ext4_delete_inode,
1035         .write_super    = ext4_write_super,
1036         .put_super      = ext4_put_super,
1037         .statfs         = ext4_statfs,
1038         .remount_fs     = ext4_remount,
1039         .clear_inode    = ext4_clear_inode,
1040         .show_options   = ext4_show_options,
1041 #ifdef CONFIG_QUOTA
1042         .quota_read     = ext4_quota_read,
1043         .quota_write    = ext4_quota_write,
1044 #endif
1045         .bdev_try_to_free_page = bdev_try_to_free_page,
1046 };
1047
1048 static const struct export_operations ext4_export_ops = {
1049         .fh_to_dentry = ext4_fh_to_dentry,
1050         .fh_to_parent = ext4_fh_to_parent,
1051         .get_parent = ext4_get_parent,
1052 };
1053
1054 enum {
1055         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1056         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1057         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1058         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1059         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
1060         Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1061         Opt_journal_update, Opt_journal_dev,
1062         Opt_journal_checksum, Opt_journal_async_commit,
1063         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1064         Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
1065         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1066         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1067         Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
1068         Opt_usrquota, Opt_grpquota, Opt_i_version,
1069         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1070         Opt_block_validity, Opt_noblock_validity,
1071         Opt_inode_readahead_blks, Opt_journal_ioprio
1072 };
1073
1074 static const match_table_t tokens = {
1075         {Opt_bsd_df, "bsddf"},
1076         {Opt_minix_df, "minixdf"},
1077         {Opt_grpid, "grpid"},
1078         {Opt_grpid, "bsdgroups"},
1079         {Opt_nogrpid, "nogrpid"},
1080         {Opt_nogrpid, "sysvgroups"},
1081         {Opt_resgid, "resgid=%u"},
1082         {Opt_resuid, "resuid=%u"},
1083         {Opt_sb, "sb=%u"},
1084         {Opt_err_cont, "errors=continue"},
1085         {Opt_err_panic, "errors=panic"},
1086         {Opt_err_ro, "errors=remount-ro"},
1087         {Opt_nouid32, "nouid32"},
1088         {Opt_debug, "debug"},
1089         {Opt_oldalloc, "oldalloc"},
1090         {Opt_orlov, "orlov"},
1091         {Opt_user_xattr, "user_xattr"},
1092         {Opt_nouser_xattr, "nouser_xattr"},
1093         {Opt_acl, "acl"},
1094         {Opt_noacl, "noacl"},
1095         {Opt_noload, "noload"},
1096         {Opt_nobh, "nobh"},
1097         {Opt_bh, "bh"},
1098         {Opt_commit, "commit=%u"},
1099         {Opt_min_batch_time, "min_batch_time=%u"},
1100         {Opt_max_batch_time, "max_batch_time=%u"},
1101         {Opt_journal_update, "journal=update"},
1102         {Opt_journal_dev, "journal_dev=%u"},
1103         {Opt_journal_checksum, "journal_checksum"},
1104         {Opt_journal_async_commit, "journal_async_commit"},
1105         {Opt_abort, "abort"},
1106         {Opt_data_journal, "data=journal"},
1107         {Opt_data_ordered, "data=ordered"},
1108         {Opt_data_writeback, "data=writeback"},
1109         {Opt_data_err_abort, "data_err=abort"},
1110         {Opt_data_err_ignore, "data_err=ignore"},
1111         {Opt_mb_history_length, "mb_history_length=%u"},
1112         {Opt_offusrjquota, "usrjquota="},
1113         {Opt_usrjquota, "usrjquota=%s"},
1114         {Opt_offgrpjquota, "grpjquota="},
1115         {Opt_grpjquota, "grpjquota=%s"},
1116         {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1117         {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1118         {Opt_grpquota, "grpquota"},
1119         {Opt_noquota, "noquota"},
1120         {Opt_quota, "quota"},
1121         {Opt_usrquota, "usrquota"},
1122         {Opt_barrier, "barrier=%u"},
1123         {Opt_barrier, "barrier"},
1124         {Opt_nobarrier, "nobarrier"},
1125         {Opt_i_version, "i_version"},
1126         {Opt_stripe, "stripe=%u"},
1127         {Opt_resize, "resize"},
1128         {Opt_delalloc, "delalloc"},
1129         {Opt_nodelalloc, "nodelalloc"},
1130         {Opt_block_validity, "block_validity"},
1131         {Opt_noblock_validity, "noblock_validity"},
1132         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1133         {Opt_journal_ioprio, "journal_ioprio=%u"},
1134         {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1135         {Opt_auto_da_alloc, "auto_da_alloc"},
1136         {Opt_noauto_da_alloc, "noauto_da_alloc"},
1137         {Opt_err, NULL},
1138 };
1139
1140 static ext4_fsblk_t get_sb_block(void **data)
1141 {
1142         ext4_fsblk_t    sb_block;
1143         char            *options = (char *) *data;
1144
1145         if (!options || strncmp(options, "sb=", 3) != 0)
1146                 return 1;       /* Default location */
1147
1148         options += 3;
1149         /* TODO: use simple_strtoll with >32bit ext4 */
1150         sb_block = simple_strtoul(options, &options, 0);
1151         if (*options && *options != ',') {
1152                 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1153                        (char *) *data);
1154                 return 1;
1155         }
1156         if (*options == ',')
1157                 options++;
1158         *data = (void *) options;
1159
1160         return sb_block;
1161 }
1162
1163 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1164
1165 static int parse_options(char *options, struct super_block *sb,
1166                          unsigned long *journal_devnum,
1167                          unsigned int *journal_ioprio,
1168                          ext4_fsblk_t *n_blocks_count, int is_remount)
1169 {
1170         struct ext4_sb_info *sbi = EXT4_SB(sb);
1171         char *p;
1172         substring_t args[MAX_OPT_ARGS];
1173         int data_opt = 0;
1174         int option;
1175 #ifdef CONFIG_QUOTA
1176         int qtype, qfmt;
1177         char *qname;
1178 #endif
1179
1180         if (!options)
1181                 return 1;
1182
1183         while ((p = strsep(&options, ",")) != NULL) {
1184                 int token;
1185                 if (!*p)
1186                         continue;
1187
1188                 token = match_token(p, tokens, args);
1189                 switch (token) {
1190                 case Opt_bsd_df:
1191                         clear_opt(sbi->s_mount_opt, MINIX_DF);
1192                         break;
1193                 case Opt_minix_df:
1194                         set_opt(sbi->s_mount_opt, MINIX_DF);
1195                         break;
1196                 case Opt_grpid:
1197                         set_opt(sbi->s_mount_opt, GRPID);
1198                         break;
1199                 case Opt_nogrpid:
1200                         clear_opt(sbi->s_mount_opt, GRPID);
1201                         break;
1202                 case Opt_resuid:
1203                         if (match_int(&args[0], &option))
1204                                 return 0;
1205                         sbi->s_resuid = option;
1206                         break;
1207                 case Opt_resgid:
1208                         if (match_int(&args[0], &option))
1209                                 return 0;
1210                         sbi->s_resgid = option;
1211                         break;
1212                 case Opt_sb:
1213                         /* handled by get_sb_block() instead of here */
1214                         /* *sb_block = match_int(&args[0]); */
1215                         break;
1216                 case Opt_err_panic:
1217                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1218                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1219                         set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1220                         break;
1221                 case Opt_err_ro:
1222                         clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1223                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1224                         set_opt(sbi->s_mount_opt, ERRORS_RO);
1225                         break;
1226                 case Opt_err_cont:
1227                         clear_opt(sbi->s_mount_opt, ERRORS_RO);
1228                         clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1229                         set_opt(sbi->s_mount_opt, ERRORS_CONT);
1230                         break;
1231                 case Opt_nouid32:
1232                         set_opt(sbi->s_mount_opt, NO_UID32);
1233                         break;
1234                 case Opt_debug:
1235                         set_opt(sbi->s_mount_opt, DEBUG);
1236                         break;
1237                 case Opt_oldalloc:
1238                         set_opt(sbi->s_mount_opt, OLDALLOC);
1239                         break;
1240                 case Opt_orlov:
1241                         clear_opt(sbi->s_mount_opt, OLDALLOC);
1242                         break;
1243 #ifdef CONFIG_EXT4_FS_XATTR
1244                 case Opt_user_xattr:
1245                         set_opt(sbi->s_mount_opt, XATTR_USER);
1246                         break;
1247                 case Opt_nouser_xattr:
1248                         clear_opt(sbi->s_mount_opt, XATTR_USER);
1249                         break;
1250 #else
1251                 case Opt_user_xattr:
1252                 case Opt_nouser_xattr:
1253                         ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
1254                         break;
1255 #endif
1256 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1257                 case Opt_acl:
1258                         set_opt(sbi->s_mount_opt, POSIX_ACL);
1259                         break;
1260                 case Opt_noacl:
1261                         clear_opt(sbi->s_mount_opt, POSIX_ACL);
1262                         break;
1263 #else
1264                 case Opt_acl:
1265                 case Opt_noacl:
1266                         ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
1267                         break;
1268 #endif
1269                 case Opt_journal_update:
1270                         /* @@@ FIXME */
1271                         /* Eventually we will want to be able to create
1272                            a journal file here.  For now, only allow the
1273                            user to specify an existing inode to be the
1274                            journal file. */
1275                         if (is_remount) {
1276                                 ext4_msg(sb, KERN_ERR,
1277                                          "Cannot specify journal on remount");
1278                                 return 0;
1279                         }
1280                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1281                         break;
1282                 case Opt_journal_dev:
1283                         if (is_remount) {
1284                                 ext4_msg(sb, KERN_ERR,
1285                                         "Cannot specify journal on remount");
1286                                 return 0;
1287                         }
1288                         if (match_int(&args[0], &option))
1289                                 return 0;
1290                         *journal_devnum = option;
1291                         break;
1292                 case Opt_journal_checksum:
1293                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1294                         break;
1295                 case Opt_journal_async_commit:
1296                         set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1297                         set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1298                         break;
1299                 case Opt_noload:
1300                         set_opt(sbi->s_mount_opt, NOLOAD);
1301                         break;
1302                 case Opt_commit:
1303                         if (match_int(&args[0], &option))
1304                                 return 0;
1305                         if (option < 0)
1306                                 return 0;
1307                         if (option == 0)
1308                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1309                         sbi->s_commit_interval = HZ * option;
1310                         break;
1311                 case Opt_max_batch_time:
1312                         if (match_int(&args[0], &option))
1313                                 return 0;
1314                         if (option < 0)
1315                                 return 0;
1316                         if (option == 0)
1317                                 option = EXT4_DEF_MAX_BATCH_TIME;
1318                         sbi->s_max_batch_time = option;
1319                         break;
1320                 case Opt_min_batch_time:
1321                         if (match_int(&args[0], &option))
1322                                 return 0;
1323                         if (option < 0)
1324                                 return 0;
1325                         sbi->s_min_batch_time = option;
1326                         break;
1327                 case Opt_data_journal:
1328                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
1329                         goto datacheck;
1330                 case Opt_data_ordered:
1331                         data_opt = EXT4_MOUNT_ORDERED_DATA;
1332                         goto datacheck;
1333                 case Opt_data_writeback:
1334                         data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1335                 datacheck:
1336                         if (is_remount) {
1337                                 if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1338                                                 != data_opt) {
1339                                         ext4_msg(sb, KERN_ERR,
1340                                                 "Cannot change data mode on remount");
1341                                         return 0;
1342                                 }
1343                         } else {
1344                                 sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1345                                 sbi->s_mount_opt |= data_opt;
1346                         }
1347                         break;
1348                 case Opt_data_err_abort:
1349                         set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1350                         break;
1351                 case Opt_data_err_ignore:
1352                         clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1353                         break;
1354                 case Opt_mb_history_length:
1355                         if (match_int(&args[0], &option))
1356                                 return 0;
1357                         if (option < 0)
1358                                 return 0;
1359                         sbi->s_mb_history_max = option;
1360                         break;
1361 #ifdef CONFIG_QUOTA
1362                 case Opt_usrjquota:
1363                         qtype = USRQUOTA;
1364                         goto set_qf_name;
1365                 case Opt_grpjquota:
1366                         qtype = GRPQUOTA;
1367 set_qf_name:
1368                         if (sb_any_quota_loaded(sb) &&
1369                             !sbi->s_qf_names[qtype]) {
1370                                 ext4_msg(sb, KERN_ERR,
1371                                        "Cannot change journaled "
1372                                        "quota options when quota turned on");
1373                                 return 0;
1374                         }
1375                         qname = match_strdup(&args[0]);
1376                         if (!qname) {
1377                                 ext4_msg(sb, KERN_ERR,
1378                                         "Not enough memory for "
1379                                         "storing quotafile name");
1380                                 return 0;
1381                         }
1382                         if (sbi->s_qf_names[qtype] &&
1383                             strcmp(sbi->s_qf_names[qtype], qname)) {
1384                                 ext4_msg(sb, KERN_ERR,
1385                                         "%s quota file already "
1386                                         "specified", QTYPE2NAME(qtype));
1387                                 kfree(qname);
1388                                 return 0;
1389                         }
1390                         sbi->s_qf_names[qtype] = qname;
1391                         if (strchr(sbi->s_qf_names[qtype], '/')) {
1392                                 ext4_msg(sb, KERN_ERR,
1393                                         "quotafile must be on "
1394                                         "filesystem root");
1395                                 kfree(sbi->s_qf_names[qtype]);
1396                                 sbi->s_qf_names[qtype] = NULL;
1397                                 return 0;
1398                         }
1399                         set_opt(sbi->s_mount_opt, QUOTA);
1400                         break;
1401                 case Opt_offusrjquota:
1402                         qtype = USRQUOTA;
1403                         goto clear_qf_name;
1404                 case Opt_offgrpjquota:
1405                         qtype = GRPQUOTA;
1406 clear_qf_name:
1407                         if (sb_any_quota_loaded(sb) &&
1408                             sbi->s_qf_names[qtype]) {
1409                                 ext4_msg(sb, KERN_ERR, "Cannot change "
1410                                         "journaled quota options when "
1411                                         "quota turned on");
1412                                 return 0;
1413                         }
1414                         /*
1415                          * The space will be released later when all options
1416                          * are confirmed to be correct
1417                          */
1418                         sbi->s_qf_names[qtype] = NULL;
1419                         break;
1420                 case Opt_jqfmt_vfsold:
1421                         qfmt = QFMT_VFS_OLD;
1422                         goto set_qf_format;
1423                 case Opt_jqfmt_vfsv0:
1424                         qfmt = QFMT_VFS_V0;
1425 set_qf_format:
1426                         if (sb_any_quota_loaded(sb) &&
1427                             sbi->s_jquota_fmt != qfmt) {
1428                                 ext4_msg(sb, KERN_ERR, "Cannot change "
1429                                         "journaled quota options when "
1430                                         "quota turned on");
1431                                 return 0;
1432                         }
1433                         sbi->s_jquota_fmt = qfmt;
1434                         break;
1435                 case Opt_quota:
1436                 case Opt_usrquota:
1437                         set_opt(sbi->s_mount_opt, QUOTA);
1438                         set_opt(sbi->s_mount_opt, USRQUOTA);
1439                         break;
1440                 case Opt_grpquota:
1441                         set_opt(sbi->s_mount_opt, QUOTA);
1442                         set_opt(sbi->s_mount_opt, GRPQUOTA);
1443                         break;
1444                 case Opt_noquota:
1445                         if (sb_any_quota_loaded(sb)) {
1446                                 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1447                                         "options when quota turned on");
1448                                 return 0;
1449                         }
1450                         clear_opt(sbi->s_mount_opt, QUOTA);
1451                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1452                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1453                         break;
1454 #else
1455                 case Opt_quota:
1456                 case Opt_usrquota:
1457                 case Opt_grpquota:
1458                         ext4_msg(sb, KERN_ERR,
1459                                 "quota options not supported");
1460                         break;
1461                 case Opt_usrjquota:
1462                 case Opt_grpjquota:
1463                 case Opt_offusrjquota:
1464                 case Opt_offgrpjquota:
1465                 case Opt_jqfmt_vfsold:
1466                 case Opt_jqfmt_vfsv0:
1467                         ext4_msg(sb, KERN_ERR,
1468                                 "journaled quota options not supported");
1469                         break;
1470                 case Opt_noquota:
1471                         break;
1472 #endif
1473                 case Opt_abort:
1474                         set_opt(sbi->s_mount_opt, ABORT);
1475                         break;
1476                 case Opt_nobarrier:
1477                         clear_opt(sbi->s_mount_opt, BARRIER);
1478                         break;
1479                 case Opt_barrier:
1480                         if (match_int(&args[0], &option)) {
1481                                 set_opt(sbi->s_mount_opt, BARRIER);
1482                                 break;
1483                         }
1484                         if (option)
1485                                 set_opt(sbi->s_mount_opt, BARRIER);
1486                         else
1487                                 clear_opt(sbi->s_mount_opt, BARRIER);
1488                         break;
1489                 case Opt_ignore:
1490                         break;
1491                 case Opt_resize:
1492                         if (!is_remount) {
1493                                 ext4_msg(sb, KERN_ERR,
1494                                         "resize option only available "
1495                                         "for remount");
1496                                 return 0;
1497                         }
1498                         if (match_int(&args[0], &option) != 0)
1499                                 return 0;
1500                         *n_blocks_count = option;
1501                         break;
1502                 case Opt_nobh:
1503                         set_opt(sbi->s_mount_opt, NOBH);
1504                         break;
1505                 case Opt_bh:
1506                         clear_opt(sbi->s_mount_opt, NOBH);
1507                         break;
1508                 case Opt_i_version:
1509                         set_opt(sbi->s_mount_opt, I_VERSION);
1510                         sb->s_flags |= MS_I_VERSION;
1511                         break;
1512                 case Opt_nodelalloc:
1513                         clear_opt(sbi->s_mount_opt, DELALLOC);
1514                         break;
1515                 case Opt_stripe:
1516                         if (match_int(&args[0], &option))
1517                                 return 0;
1518                         if (option < 0)
1519                                 return 0;
1520                         sbi->s_stripe = option;
1521                         break;
1522                 case Opt_delalloc:
1523                         set_opt(sbi->s_mount_opt, DELALLOC);
1524                         break;
1525                 case Opt_block_validity:
1526                         set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1527                         break;
1528                 case Opt_noblock_validity:
1529                         clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
1530                         break;
1531                 case Opt_inode_readahead_blks:
1532                         if (match_int(&args[0], &option))
1533                                 return 0;
1534                         if (option < 0 || option > (1 << 30))
1535                                 return 0;
1536                         if (!is_power_of_2(option)) {
1537                                 ext4_msg(sb, KERN_ERR,
1538                                          "EXT4-fs: inode_readahead_blks"
1539                                          " must be a power of 2");
1540                                 return 0;
1541                         }
1542                         sbi->s_inode_readahead_blks = option;
1543                         break;
1544                 case Opt_journal_ioprio:
1545                         if (match_int(&args[0], &option))
1546                                 return 0;
1547                         if (option < 0 || option > 7)
1548                                 break;
1549                         *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1550                                                             option);
1551                         break;
1552                 case Opt_noauto_da_alloc:
1553                         set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1554                         break;
1555                 case Opt_auto_da_alloc:
1556                         if (match_int(&args[0], &option)) {
1557                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1558                                 break;
1559                         }
1560                         if (option)
1561                                 clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
1562                         else
1563                                 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1564                         break;
1565                 default:
1566                         ext4_msg(sb, KERN_ERR,
1567                                "Unrecognized mount option \"%s\" "
1568                                "or missing value", p);
1569                         return 0;
1570                 }
1571         }
1572 #ifdef CONFIG_QUOTA
1573         if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1574                 if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1575                      sbi->s_qf_names[USRQUOTA])
1576                         clear_opt(sbi->s_mount_opt, USRQUOTA);
1577
1578                 if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1579                      sbi->s_qf_names[GRPQUOTA])
1580                         clear_opt(sbi->s_mount_opt, GRPQUOTA);
1581
1582                 if ((sbi->s_qf_names[USRQUOTA] &&
1583                                 (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1584                     (sbi->s_qf_names[GRPQUOTA] &&
1585                                 (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1586                         ext4_msg(sb, KERN_ERR, "old and new quota "
1587                                         "format mixing");
1588                         return 0;
1589                 }
1590
1591                 if (!sbi->s_jquota_fmt) {
1592                         ext4_msg(sb, KERN_ERR, "journaled quota format "
1593                                         "not specified");
1594                         return 0;
1595                 }
1596         } else {
1597                 if (sbi->s_jquota_fmt) {
1598                         ext4_msg(sb, KERN_ERR, "journaled quota format "
1599                                         "specified with no journaling "
1600                                         "enabled");
1601                         return 0;
1602                 }
1603         }
1604 #endif
1605         return 1;
1606 }
1607
1608 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1609                             int read_only)
1610 {
1611         struct ext4_sb_info *sbi = EXT4_SB(sb);
1612         int res = 0;
1613
1614         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1615                 ext4_msg(sb, KERN_ERR, "revision level too high, "
1616                          "forcing read-only mode");
1617                 res = MS_RDONLY;
1618         }
1619         if (read_only)
1620                 return res;
1621         if (!(sbi->s_mount_state & EXT4_VALID_FS))
1622                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1623                          "running e2fsck is recommended");
1624         else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1625                 ext4_msg(sb, KERN_WARNING,
1626                          "warning: mounting fs with errors, "
1627                          "running e2fsck is recommended");
1628         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1629                  le16_to_cpu(es->s_mnt_count) >=
1630                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1631                 ext4_msg(sb, KERN_WARNING,
1632                          "warning: maximal mount count reached, "
1633                          "running e2fsck is recommended");
1634         else if (le32_to_cpu(es->s_checkinterval) &&
1635                 (le32_to_cpu(es->s_lastcheck) +
1636                         le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1637                 ext4_msg(sb, KERN_WARNING,
1638                          "warning: checktime reached, "
1639                          "running e2fsck is recommended");
1640         if (!sbi->s_journal)
1641                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1642         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1643                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1644         le16_add_cpu(&es->s_mnt_count, 1);
1645         es->s_mtime = cpu_to_le32(get_seconds());
1646         ext4_update_dynamic_rev(sb);
1647         if (sbi->s_journal)
1648                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1649
1650         ext4_commit_super(sb, 1);
1651         if (test_opt(sb, DEBUG))
1652                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1653                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
1654                         sb->s_blocksize,
1655                         sbi->s_groups_count,
1656                         EXT4_BLOCKS_PER_GROUP(sb),
1657                         EXT4_INODES_PER_GROUP(sb),
1658                         sbi->s_mount_opt);
1659
1660         if (EXT4_SB(sb)->s_journal) {
1661                 ext4_msg(sb, KERN_INFO, "%s journal on %s",
1662                        EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1663                        "external", EXT4_SB(sb)->s_journal->j_devname);
1664         } else {
1665                 ext4_msg(sb, KERN_INFO, "no journal");
1666         }
1667         return res;
1668 }
1669
1670 static int ext4_fill_flex_info(struct super_block *sb)
1671 {
1672         struct ext4_sb_info *sbi = EXT4_SB(sb);
1673         struct ext4_group_desc *gdp = NULL;
1674         ext4_group_t flex_group_count;
1675         ext4_group_t flex_group;
1676         int groups_per_flex = 0;
1677         size_t size;
1678         int i;
1679
1680         if (!sbi->s_es->s_log_groups_per_flex) {
1681                 sbi->s_log_groups_per_flex = 0;
1682                 return 1;
1683         }
1684
1685         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1686         groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1687
1688         /* We allocate both existing and potentially added groups */
1689         flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1690                         ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1691                               EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1692         size = flex_group_count * sizeof(struct flex_groups);
1693         sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
1694         if (sbi->s_flex_groups == NULL) {
1695                 sbi->s_flex_groups = vmalloc(size);
1696                 if (sbi->s_flex_groups)
1697                         memset(sbi->s_flex_groups, 0, size);
1698         }
1699         if (sbi->s_flex_groups == NULL) {
1700                 ext4_msg(sb, KERN_ERR, "not enough memory for "
1701                                 "%u flex groups", flex_group_count);
1702                 goto failed;
1703         }
1704
1705         for (i = 0; i < sbi->s_groups_count; i++) {
1706                 gdp = ext4_get_group_desc(sb, i, NULL);
1707
1708                 flex_group = ext4_flex_group(sbi, i);
1709                 atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
1710                            ext4_free_inodes_count(sb, gdp));
1711                 atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
1712                            ext4_free_blks_count(sb, gdp));
1713                 atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
1714                            ext4_used_dirs_count(sb, gdp));
1715         }
1716
1717         return 1;
1718 failed:
1719         return 0;
1720 }
1721
1722 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1723                             struct ext4_group_desc *gdp)
1724 {
1725         __u16 crc = 0;
1726
1727         if (sbi->s_es->s_feature_ro_compat &
1728             cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1729                 int offset = offsetof(struct ext4_group_desc, bg_checksum);
1730                 __le32 le_group = cpu_to_le32(block_group);
1731
1732                 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1733                 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1734                 crc = crc16(crc, (__u8 *)gdp, offset);
1735                 offset += sizeof(gdp->bg_checksum); /* skip checksum */
1736                 /* for checksum of struct ext4_group_desc do the rest...*/
1737                 if ((sbi->s_es->s_feature_incompat &
1738                      cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1739                     offset < le16_to_cpu(sbi->s_es->s_desc_size))
1740                         crc = crc16(crc, (__u8 *)gdp + offset,
1741                                     le16_to_cpu(sbi->s_es->s_desc_size) -
1742                                         offset);
1743         }
1744
1745         return cpu_to_le16(crc);
1746 }
1747
1748 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1749                                 struct ext4_group_desc *gdp)
1750 {
1751         if ((sbi->s_es->s_feature_ro_compat &
1752              cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1753             (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1754                 return 0;
1755
1756         return 1;
1757 }
1758
1759 /* Called at mount-time, super-block is locked */
1760 static int ext4_check_descriptors(struct super_block *sb)
1761 {
1762         struct ext4_sb_info *sbi = EXT4_SB(sb);
1763         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1764         ext4_fsblk_t last_block;
1765         ext4_fsblk_t block_bitmap;
1766         ext4_fsblk_t inode_bitmap;
1767         ext4_fsblk_t inode_table;
1768         int flexbg_flag = 0;
1769         ext4_group_t i;
1770
1771         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1772                 flexbg_flag = 1;
1773
1774         ext4_debug("Checking group descriptors");
1775
1776         for (i = 0; i < sbi->s_groups_count; i++) {
1777                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1778
1779                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
1780                         last_block = ext4_blocks_count(sbi->s_es) - 1;
1781                 else
1782                         last_block = first_block +
1783                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1784
1785                 block_bitmap = ext4_block_bitmap(sb, gdp);
1786                 if (block_bitmap < first_block || block_bitmap > last_block) {
1787                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1788                                "Block bitmap for group %u not in group "
1789                                "(block %llu)!", i, block_bitmap);
1790                         return 0;
1791                 }
1792                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1793                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1794                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1795                                "Inode bitmap for group %u not in group "
1796                                "(block %llu)!", i, inode_bitmap);
1797                         return 0;
1798                 }
1799                 inode_table = ext4_inode_table(sb, gdp);
1800                 if (inode_table < first_block ||
1801                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
1802                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1803                                "Inode table for group %u not in group "
1804                                "(block %llu)!", i, inode_table);
1805                         return 0;
1806                 }
1807                 ext4_lock_group(sb, i);
1808                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1809                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
1810                                  "Checksum for group %u failed (%u!=%u)",
1811                                  i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1812                                      gdp)), le16_to_cpu(gdp->bg_checksum));
1813                         if (!(sb->s_flags & MS_RDONLY)) {
1814                                 ext4_unlock_group(sb, i);
1815                                 return 0;
1816                         }
1817                 }
1818                 ext4_unlock_group(sb, i);
1819                 if (!flexbg_flag)
1820                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
1821         }
1822
1823         ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1824         sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
1825         return 1;
1826 }
1827
1828 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1829  * the superblock) which were deleted from all directories, but held open by
1830  * a process at the time of a crash.  We walk the list and try to delete these
1831  * inodes at recovery time (only with a read-write filesystem).
1832  *
1833  * In order to keep the orphan inode chain consistent during traversal (in
1834  * case of crash during recovery), we link each inode into the superblock
1835  * orphan list_head and handle it the same way as an inode deletion during
1836  * normal operation (which journals the operations for us).
1837  *
1838  * We only do an iget() and an iput() on each inode, which is very safe if we
1839  * accidentally point at an in-use or already deleted inode.  The worst that
1840  * can happen in this case is that we get a "bit already cleared" message from
1841  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1842  * e2fsck was run on this filesystem, and it must have already done the orphan
1843  * inode cleanup for us, so we can safely abort without any further action.
1844  */
1845 static void ext4_orphan_cleanup(struct super_block *sb,
1846                                 struct ext4_super_block *es)
1847 {
1848         unsigned int s_flags = sb->s_flags;
1849         int nr_orphans = 0, nr_truncates = 0;
1850 #ifdef CONFIG_QUOTA
1851         int i;
1852 #endif
1853         if (!es->s_last_orphan) {
1854                 jbd_debug(4, "no orphan inodes to clean up\n");
1855                 return;
1856         }
1857
1858         if (bdev_read_only(sb->s_bdev)) {
1859                 ext4_msg(sb, KERN_ERR, "write access "
1860                         "unavailable, skipping orphan cleanup");
1861                 return;
1862         }
1863
1864         if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1865                 if (es->s_last_orphan)
1866                         jbd_debug(1, "Errors on filesystem, "
1867                                   "clearing orphan list.\n");
1868                 es->s_last_orphan = 0;
1869                 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1870                 return;
1871         }
1872
1873         if (s_flags & MS_RDONLY) {
1874                 ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1875                 sb->s_flags &= ~MS_RDONLY;
1876         }
1877 #ifdef CONFIG_QUOTA
1878         /* Needed for iput() to work correctly and not trash data */
1879         sb->s_flags |= MS_ACTIVE;
1880         /* Turn on quotas so that they are updated correctly */
1881         for (i = 0; i < MAXQUOTAS; i++) {
1882                 if (EXT4_SB(sb)->s_qf_names[i]) {
1883                         int ret = ext4_quota_on_mount(sb, i);
1884                         if (ret < 0)
1885                                 ext4_msg(sb, KERN_ERR,
1886                                         "Cannot turn on journaled "
1887                                         "quota: error %d", ret);
1888                 }
1889         }
1890 #endif
1891
1892         while (es->s_last_orphan) {
1893                 struct inode *inode;
1894
1895                 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1896                 if (IS_ERR(inode)) {
1897                         es->s_last_orphan = 0;
1898                         break;
1899                 }
1900
1901                 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1902                 vfs_dq_init(inode);
1903                 if (inode->i_nlink) {
1904                         ext4_msg(sb, KERN_DEBUG,
1905                                 "%s: truncating inode %lu to %lld bytes",
1906                                 __func__, inode->i_ino, inode->i_size);
1907                         jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1908                                   inode->i_ino, inode->i_size);
1909                         ext4_truncate(inode);
1910                         nr_truncates++;
1911                 } else {
1912                         ext4_msg(sb, KERN_DEBUG,
1913                                 "%s: deleting unreferenced inode %lu",
1914                                 __func__, inode->i_ino);
1915                         jbd_debug(2, "deleting unreferenced inode %lu\n",
1916                                   inode->i_ino);
1917                         nr_orphans++;
1918                 }
1919                 iput(inode);  /* The delete magic happens here! */
1920         }
1921
1922 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1923
1924         if (nr_orphans)
1925                 ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1926                        PLURAL(nr_orphans));
1927         if (nr_truncates)
1928                 ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1929                        PLURAL(nr_truncates));
1930 #ifdef CONFIG_QUOTA
1931         /* Turn quotas off */
1932         for (i = 0; i < MAXQUOTAS; i++) {
1933                 if (sb_dqopt(sb)->files[i])
1934                         vfs_quota_off(sb, i, 0);
1935         }
1936 #endif
1937         sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1938 }
1939
1940 /*
1941  * Maximal extent format file size.
1942  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1943  * extent format containers, within a sector_t, and within i_blocks
1944  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1945  * so that won't be a limiting factor.
1946  *
1947  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1948  */
1949 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1950 {
1951         loff_t res;
1952         loff_t upper_limit = MAX_LFS_FILESIZE;
1953
1954         /* small i_blocks in vfs inode? */
1955         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1956                 /*
1957                  * CONFIG_LBD is not enabled implies the inode
1958                  * i_block represent total blocks in 512 bytes
1959                  * 32 == size of vfs inode i_blocks * 8
1960                  */
1961                 upper_limit = (1LL << 32) - 1;
1962
1963                 /* total blocks in file system block size */
1964                 upper_limit >>= (blkbits - 9);
1965                 upper_limit <<= blkbits;
1966         }
1967
1968         /* 32-bit extent-start container, ee_block */
1969         res = 1LL << 32;
1970         res <<= blkbits;
1971         res -= 1;
1972
1973         /* Sanity check against vm- & vfs- imposed limits */
1974         if (res > upper_limit)
1975                 res = upper_limit;
1976
1977         return res;
1978 }
1979
1980 /*
1981  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1982  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1983  * We need to be 1 filesystem block less than the 2^48 sector limit.
1984  */
1985 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1986 {
1987         loff_t res = EXT4_NDIR_BLOCKS;
1988         int meta_blocks;
1989         loff_t upper_limit;
1990         /* This is calculated to be the largest file size for a dense, block
1991          * mapped file such that the file's total number of 512-byte sectors,
1992          * including data and all indirect blocks, does not exceed (2^48 - 1).
1993          *
1994          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
1995          * number of 512-byte sectors of the file.
1996          */
1997
1998         if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1999                 /*
2000                  * !has_huge_files or CONFIG_LBD not enabled implies that
2001                  * the inode i_block field represents total file blocks in
2002                  * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
2003                  */
2004                 upper_limit = (1LL << 32) - 1;
2005
2006                 /* total blocks in file system block size */
2007                 upper_limit >>= (bits - 9);
2008
2009         } else {
2010                 /*
2011                  * We use 48 bit ext4_inode i_blocks
2012                  * With EXT4_HUGE_FILE_FL set the i_blocks
2013                  * represent total number of blocks in
2014                  * file system block size
2015                  */
2016                 upper_limit = (1LL << 48) - 1;
2017
2018         }
2019
2020         /* indirect blocks */
2021         meta_blocks = 1;
2022         /* double indirect blocks */
2023         meta_blocks += 1 + (1LL << (bits-2));
2024         /* tripple indirect blocks */
2025         meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
2026
2027         upper_limit -= meta_blocks;
2028         upper_limit <<= bits;
2029
2030         res += 1LL << (bits-2);
2031         res += 1LL << (2*(bits-2));
2032         res += 1LL << (3*(bits-2));
2033         res <<= bits;
2034         if (res > upper_limit)
2035                 res = upper_limit;
2036
2037         if (res > MAX_LFS_FILESIZE)
2038                 res = MAX_LFS_FILESIZE;
2039
2040         return res;
2041 }
2042
2043 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2044                                    ext4_fsblk_t logical_sb_block, int nr)
2045 {
2046         struct ext4_sb_info *sbi = EXT4_SB(sb);
2047         ext4_group_t bg, first_meta_bg;
2048         int has_super = 0;
2049
2050         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
2051
2052         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
2053             nr < first_meta_bg)
2054                 return logical_sb_block + nr + 1;
2055         bg = sbi->s_desc_per_block * nr;
2056         if (ext4_bg_has_super(sb, bg))
2057                 has_super = 1;
2058
2059         return (has_super + ext4_group_first_block_no(sb, bg));
2060 }
2061
2062 /**
2063  * ext4_get_stripe_size: Get the stripe size.
2064  * @sbi: In memory super block info
2065  *
2066  * If we have specified it via mount option, then
2067  * use the mount option value. If the value specified at mount time is
2068  * greater than the blocks per group use the super block value.
2069  * If the super block value is greater than blocks per group return 0.
2070  * Allocator needs it be less than blocks per group.
2071  *
2072  */
2073 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
2074 {
2075         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
2076         unsigned long stripe_width =
2077                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
2078
2079         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
2080                 return sbi->s_stripe;
2081
2082         if (stripe_width <= sbi->s_blocks_per_group)
2083                 return stripe_width;
2084
2085         if (stride <= sbi->s_blocks_per_group)
2086                 return stride;
2087
2088         return 0;
2089 }
2090
2091 /* sysfs supprt */
2092
2093 struct ext4_attr {
2094         struct attribute attr;
2095         ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
2096         ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
2097                          const char *, size_t);
2098         int offset;
2099 };
2100
2101 static int parse_strtoul(const char *buf,
2102                 unsigned long max, unsigned long *value)
2103 {
2104         char *endp;
2105
2106         while (*buf && isspace(*buf))
2107                 buf++;
2108         *value = simple_strtoul(buf, &endp, 0);
2109         while (*endp && isspace(*endp))
2110                 endp++;
2111         if (*endp || *value > max)
2112                 return -EINVAL;
2113
2114         return 0;
2115 }
2116
2117 static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2118                                               struct ext4_sb_info *sbi,
2119                                               char *buf)
2120 {
2121         return snprintf(buf, PAGE_SIZE, "%llu\n",
2122                         (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
2123 }
2124
2125 static ssize_t session_write_kbytes_show(struct ext4_attr *a,
2126                                          struct ext4_sb_info *sbi, char *buf)
2127 {
2128         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2129
2130         return snprintf(buf, PAGE_SIZE, "%lu\n",
2131                         (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2132                          sbi->s_sectors_written_start) >> 1);
2133 }
2134
2135 static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2136                                           struct ext4_sb_info *sbi, char *buf)
2137 {
2138         struct super_block *sb = sbi->s_buddy_cache->i_sb;
2139
2140         return snprintf(buf, PAGE_SIZE, "%llu\n",
2141                         sbi->s_kbytes_written +
2142                         ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2143                           EXT4_SB(sb)->s_sectors_written_start) >> 1));
2144 }
2145
2146 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
2147                                           struct ext4_sb_info *sbi,
2148                                           const char *buf, size_t count)
2149 {
2150         unsigned long t;
2151
2152         if (parse_strtoul(buf, 0x40000000, &t))
2153                 return -EINVAL;
2154
2155         if (!is_power_of_2(t))
2156                 return -EINVAL;
2157
2158         sbi->s_inode_readahead_blks = t;
2159         return count;
2160 }
2161
2162 static ssize_t sbi_ui_show(struct ext4_attr *a,
2163                            struct ext4_sb_info *sbi, char *buf)
2164 {
2165         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2166
2167         return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2168 }
2169
2170 static ssize_t sbi_ui_store(struct ext4_attr *a,
2171                             struct ext4_sb_info *sbi,
2172                             const char *buf, size_t count)
2173 {
2174         unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
2175         unsigned long t;
2176
2177         if (parse_strtoul(buf, 0xffffffff, &t))
2178                 return -EINVAL;
2179         *ui = t;
2180         return count;
2181 }
2182
2183 #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
2184 static struct ext4_attr ext4_attr_##_name = {                   \
2185         .attr = {.name = __stringify(_name), .mode = _mode },   \
2186         .show   = _show,                                        \
2187         .store  = _store,                                       \
2188         .offset = offsetof(struct ext4_sb_info, _elname),       \
2189 }
2190 #define EXT4_ATTR(name, mode, show, store) \
2191 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2192
2193 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2194 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2195 #define EXT4_RW_ATTR_SBI_UI(name, elname)       \
2196         EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2197 #define ATTR_LIST(name) &ext4_attr_##name.attr
2198
2199 EXT4_RO_ATTR(delayed_allocation_blocks);
2200 EXT4_RO_ATTR(session_write_kbytes);
2201 EXT4_RO_ATTR(lifetime_write_kbytes);
2202 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
2203                  inode_readahead_blks_store, s_inode_readahead_blks);
2204 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
2205 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
2206 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
2207 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2208 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2209 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2210
2211 static struct attribute *ext4_attrs[] = {
2212         ATTR_LIST(delayed_allocation_blocks),
2213         ATTR_LIST(session_write_kbytes),
2214         ATTR_LIST(lifetime_write_kbytes),
2215         ATTR_LIST(inode_readahead_blks),
2216         ATTR_LIST(mb_stats),
2217         ATTR_LIST(mb_max_to_scan),
2218         ATTR_LIST(mb_min_to_scan),
2219         ATTR_LIST(mb_order2_req),
2220         ATTR_LIST(mb_stream_req),
2221         ATTR_LIST(mb_group_prealloc),
2222         NULL,
2223 };
2224
2225 static ssize_t ext4_attr_show(struct kobject *kobj,
2226                               struct attribute *attr, char *buf)
2227 {
2228         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2229                                                 s_kobj);
2230         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2231
2232         return a->show ? a->show(a, sbi, buf) : 0;
2233 }
2234
2235 static ssize_t ext4_attr_store(struct kobject *kobj,
2236                                struct attribute *attr,
2237                                const char *buf, size_t len)
2238 {
2239         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2240                                                 s_kobj);
2241         struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
2242
2243         return a->store ? a->store(a, sbi, buf, len) : 0;
2244 }
2245
2246 static void ext4_sb_release(struct kobject *kobj)
2247 {
2248         struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
2249                                                 s_kobj);
2250         complete(&sbi->s_kobj_unregister);
2251 }
2252
2253
2254 static struct sysfs_ops ext4_attr_ops = {
2255         .show   = ext4_attr_show,
2256         .store  = ext4_attr_store,
2257 };
2258
2259 static struct kobj_type ext4_ktype = {
2260         .default_attrs  = ext4_attrs,
2261         .sysfs_ops      = &ext4_attr_ops,
2262         .release        = ext4_sb_release,
2263 };
2264
2265 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2266                                 __releases(kernel_lock)
2267                                 __acquires(kernel_lock)
2268 {
2269         struct buffer_head *bh;
2270         struct ext4_super_block *es = NULL;
2271         struct ext4_sb_info *sbi;
2272         ext4_fsblk_t block;
2273         ext4_fsblk_t sb_block = get_sb_block(&data);
2274         ext4_fsblk_t logical_sb_block;
2275         unsigned long offset = 0;
2276         unsigned long journal_devnum = 0;
2277         unsigned long def_mount_opts;
2278         struct inode *root;
2279         char *cp;
2280         const char *descr;
2281         int ret = -EINVAL;
2282         int blocksize;
2283         unsigned int db_count;
2284         unsigned int i;
2285         int needs_recovery, has_huge_files;
2286         int features;
2287         __u64 blocks_count;
2288         int err;
2289         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2290
2291         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2292         if (!sbi)
2293                 return -ENOMEM;
2294
2295         sbi->s_blockgroup_lock =
2296                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
2297         if (!sbi->s_blockgroup_lock) {
2298                 kfree(sbi);
2299                 return -ENOMEM;
2300         }
2301         sb->s_fs_info = sbi;
2302         sbi->s_mount_opt = 0;
2303         sbi->s_resuid = EXT4_DEF_RESUID;
2304         sbi->s_resgid = EXT4_DEF_RESGID;
2305         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2306         sbi->s_sb_block = sb_block;
2307         sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
2308                                                       sectors[1]);
2309
2310         unlock_kernel();
2311
2312         /* Cleanup superblock name */
2313         for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2314                 *cp = '!';
2315
2316         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2317         if (!blocksize) {
2318                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
2319                 goto out_fail;
2320         }
2321
2322         /*
2323          * The ext4 superblock will not be buffer aligned for other than 1kB
2324          * block sizes.  We need to calculate the offset from buffer start.
2325          */
2326         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2327                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2328                 offset = do_div(logical_sb_block, blocksize);
2329         } else {
2330                 logical_sb_block = sb_block;
2331         }
2332
2333         if (!(bh = sb_bread(sb, logical_sb_block))) {
2334                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
2335                 goto out_fail;
2336         }
2337         /*
2338          * Note: s_es must be initialized as soon as possible because
2339          *       some ext4 macro-instructions depend on its value
2340          */
2341         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2342         sbi->s_es = es;
2343         sb->s_magic = le16_to_cpu(es->s_magic);
2344         if (sb->s_magic != EXT4_SUPER_MAGIC)
2345                 goto cantfind_ext4;
2346         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
2347
2348         /* Set defaults before we parse the mount options */
2349         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2350         if (def_mount_opts & EXT4_DEFM_DEBUG)
2351                 set_opt(sbi->s_mount_opt, DEBUG);
2352         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2353                 set_opt(sbi->s_mount_opt, GRPID);
2354         if (def_mount_opts & EXT4_DEFM_UID16)
2355                 set_opt(sbi->s_mount_opt, NO_UID32);
2356 #ifdef CONFIG_EXT4_FS_XATTR
2357         if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2358                 set_opt(sbi->s_mount_opt, XATTR_USER);
2359 #endif
2360 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2361         if (def_mount_opts & EXT4_DEFM_ACL)
2362                 set_opt(sbi->s_mount_opt, POSIX_ACL);
2363 #endif
2364         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2365                 sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2366         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2367                 sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2368         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2369                 sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2370
2371         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2372                 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2373         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2374                 set_opt(sbi->s_mount_opt, ERRORS_CONT);
2375         else
2376                 set_opt(sbi->s_mount_opt, ERRORS_RO);
2377
2378         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2379         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2380         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2381         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2382         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2383         sbi->s_mb_history_max = default_mb_history_length;
2384
2385         set_opt(sbi->s_mount_opt, BARRIER);
2386
2387         /*
2388          * enable delayed allocation by default
2389          * Use -o nodelalloc to turn it off
2390          */
2391         set_opt(sbi->s_mount_opt, DELALLOC);
2392
2393         if (!parse_options((char *) data, sb, &journal_devnum,
2394                            &journal_ioprio, NULL, 0))
2395                 goto failed_mount;
2396
2397         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2398                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2399
2400         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2401             (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2402              EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2403              EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2404                 ext4_msg(sb, KERN_WARNING,
2405                        "feature flags set on rev 0 fs, "
2406                        "running e2fsck is recommended");
2407
2408         /*
2409          * Check feature flags regardless of the revision level, since we
2410          * previously didn't change the revision level when setting the flags,
2411          * so there is a chance incompat flags are set on a rev 0 filesystem.
2412          */
2413         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2414         if (features) {
2415                 ext4_msg(sb, KERN_ERR,
2416                         "Couldn't mount because of "
2417                         "unsupported optional features (%x)",
2418                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2419                         ~EXT4_FEATURE_INCOMPAT_SUPP));
2420                 goto failed_mount;
2421         }
2422         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2423         if (!(sb->s_flags & MS_RDONLY) && features) {
2424                 ext4_msg(sb, KERN_ERR,
2425                         "Couldn't mount RDWR because of "
2426                         "unsupported optional features (%x)",
2427                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2428                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
2429                 goto failed_mount;
2430         }
2431         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2432                                     EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2433         if (has_huge_files) {
2434                 /*
2435                  * Large file size enabled file system can only be
2436                  * mount if kernel is build with CONFIG_LBD
2437                  */
2438                 if (sizeof(root->i_blocks) < sizeof(u64) &&
2439                                 !(sb->s_flags & MS_RDONLY)) {
2440                         ext4_msg(sb, KERN_ERR, "Filesystem with huge "
2441                                         "files cannot be mounted read-write "
2442                                         "without CONFIG_LBD");
2443                         goto failed_mount;
2444                 }
2445         }
2446         blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2447
2448         if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2449             blocksize > EXT4_MAX_BLOCK_SIZE) {
2450                 ext4_msg(sb, KERN_ERR,
2451                        "Unsupported filesystem blocksize %d", blocksize);
2452                 goto failed_mount;
2453         }
2454
2455         if (sb->s_blocksize != blocksize) {
2456                 /* Validate the filesystem blocksize */
2457                 if (!sb_set_blocksize(sb, blocksize)) {
2458                         ext4_msg(sb, KERN_ERR, "bad block size %d",
2459                                         blocksize);
2460                         goto failed_mount;
2461                 }
2462
2463                 brelse(bh);
2464                 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2465                 offset = do_div(logical_sb_block, blocksize);
2466                 bh = sb_bread(sb, logical_sb_block);
2467                 if (!bh) {
2468                         ext4_msg(sb, KERN_ERR,
2469                                "Can't read superblock on 2nd try");
2470                         goto failed_mount;
2471                 }
2472                 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2473                 sbi->s_es = es;
2474                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2475                         ext4_msg(sb, KERN_ERR,
2476                                "Magic mismatch, very weird!");
2477                         goto failed_mount;
2478                 }
2479         }
2480
2481         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2482                                                       has_huge_files);
2483         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2484
2485         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2486                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2487                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2488         } else {
2489                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2490                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2491                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2492                     (!is_power_of_2(sbi->s_inode_size)) ||
2493                     (sbi->s_inode_size > blocksize)) {
2494                         ext4_msg(sb, KERN_ERR,
2495                                "unsupported inode size: %d",
2496                                sbi->s_inode_size);
2497                         goto failed_mount;
2498                 }
2499                 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2500                         sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2501         }
2502
2503         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2504         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2505                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2506                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2507                     !is_power_of_2(sbi->s_desc_size)) {
2508                         ext4_msg(sb, KERN_ERR,
2509                                "unsupported descriptor size %lu",
2510                                sbi->s_desc_size);
2511                         goto failed_mount;
2512                 }
2513         } else
2514                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2515
2516         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2517         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2518         if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2519                 goto cantfind_ext4;
2520
2521         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2522         if (sbi->s_inodes_per_block == 0)
2523                 goto cantfind_ext4;
2524         sbi->s_itb_per_group = sbi->s_inodes_per_group /
2525                                         sbi->s_inodes_per_block;
2526         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2527         sbi->s_sbh = bh;
2528         sbi->s_mount_state = le16_to_cpu(es->s_state);
2529         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2530         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2531
2532         for (i = 0; i < 4; i++)
2533                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2534         sbi->s_def_hash_version = es->s_def_hash_version;
2535         i = le32_to_cpu(es->s_flags);
2536         if (i & EXT2_FLAGS_UNSIGNED_HASH)
2537                 sbi->s_hash_unsigned = 3;
2538         else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2539 #ifdef __CHAR_UNSIGNED__
2540                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2541                 sbi->s_hash_unsigned = 3;
2542 #else
2543                 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2544 #endif
2545                 sb->s_dirt = 1;
2546         }
2547
2548         if (sbi->s_blocks_per_group > blocksize * 8) {
2549                 ext4_msg(sb, KERN_ERR,
2550                        "#blocks per group too big: %lu",
2551                        sbi->s_blocks_per_group);
2552                 goto failed_mount;
2553         }
2554         if (sbi->s_inodes_per_group > blocksize * 8) {
2555                 ext4_msg(sb, KERN_ERR,
2556                        "#inodes per group too big: %lu",
2557                        sbi->s_inodes_per_group);
2558                 goto failed_mount;
2559         }
2560
2561         if (ext4_blocks_count(es) >
2562                     (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2563                 ext4_msg(sb, KERN_ERR, "filesystem"
2564                         " too large to mount safely");
2565                 if (sizeof(sector_t) < 8)
2566                         ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled");
2567                 goto failed_mount;
2568         }
2569
2570         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2571                 goto cantfind_ext4;
2572
2573         /* check blocks count against device size */
2574         blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2575         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2576                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
2577                        "exceeds size of device (%llu blocks)",
2578                        ext4_blocks_count(es), blocks_count);
2579                 goto failed_mount;
2580         }
2581
2582         /*
2583          * It makes no sense for the first data block to be beyond the end
2584          * of the filesystem.
2585          */
2586         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2587                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
2588                          "block %u is beyond end of filesystem (%llu)",
2589                          le32_to_cpu(es->s_first_data_block),
2590                          ext4_blocks_count(es));
2591                 goto failed_mount;
2592         }
2593         blocks_count = (ext4_blocks_count(es) -
2594                         le32_to_cpu(es->s_first_data_block) +
2595                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
2596         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2597         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2598                 ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
2599                        "(block count %llu, first data block %u, "
2600                        "blocks per group %lu)", sbi->s_groups_count,
2601                        ext4_blocks_count(es),
2602                        le32_to_cpu(es->s_first_data_block),
2603                        EXT4_BLOCKS_PER_GROUP(sb));
2604                 goto failed_mount;
2605         }
2606         sbi->s_groups_count = blocks_count;
2607         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2608                    EXT4_DESC_PER_BLOCK(sb);
2609         sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2610                                     GFP_KERNEL);
2611         if (sbi->s_group_desc == NULL) {
2612                 ext4_msg(sb, KERN_ERR, "not enough memory");
2613                 goto failed_mount;
2614         }
2615
2616 #ifdef CONFIG_PROC_FS
2617         if (ext4_proc_root)
2618                 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2619 #endif
2620
2621         bgl_lock_init(sbi->s_blockgroup_lock);
2622
2623         for (i = 0; i < db_count; i++) {
2624                 block = descriptor_loc(sb, logical_sb_block, i);
2625                 sbi->s_group_desc[i] = sb_bread(sb, block);
2626                 if (!sbi->s_group_desc[i]) {
2627                         ext4_msg(sb, KERN_ERR,
2628                                "can't read group descriptor %d", i);
2629                         db_count = i;
2630                         goto failed_mount2;
2631                 }
2632         }
2633         if (!ext4_check_descriptors(sb)) {
2634                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
2635                 goto failed_mount2;
2636         }
2637         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2638                 if (!ext4_fill_flex_info(sb)) {
2639                         ext4_msg(sb, KERN_ERR,
2640                                "unable to initialize "
2641                                "flex_bg meta info!");
2642                         goto failed_mount2;
2643                 }
2644
2645         sbi->s_gdb_count = db_count;
2646         get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2647         spin_lock_init(&sbi->s_next_gen_lock);
2648
2649         err = percpu_counter_init(&sbi->s_freeblocks_counter,
2650                         ext4_count_free_blocks(sb));
2651         if (!err) {
2652                 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2653                                 ext4_count_free_inodes(sb));
2654         }
2655         if (!err) {
2656                 err = percpu_counter_init(&sbi->s_dirs_counter,
2657                                 ext4_count_dirs(sb));
2658         }
2659         if (!err) {
2660                 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2661         }
2662         if (err) {
2663                 ext4_msg(sb, KERN_ERR, "insufficient memory");
2664                 goto failed_mount3;
2665         }
2666
2667         sbi->s_stripe = ext4_get_stripe_size(sbi);
2668
2669         /*
2670          * set up enough so that it can read an inode
2671          */
2672         if (!test_opt(sb, NOLOAD) &&
2673             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
2674                 sb->s_op = &ext4_sops;
2675         else
2676                 sb->s_op = &ext4_nojournal_sops;
2677         sb->s_export_op = &ext4_export_ops;
2678         sb->s_xattr = ext4_xattr_handlers;
2679 #ifdef CONFIG_QUOTA
2680         sb->s_qcop = &ext4_qctl_operations;
2681         sb->dq_op = &ext4_quota_operations;
2682 #endif
2683         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2684         mutex_init(&sbi->s_orphan_lock);
2685         mutex_init(&sbi->s_resize_lock);
2686
2687         sb->s_root = NULL;
2688
2689         needs_recovery = (es->s_last_orphan != 0 ||
2690                           EXT4_HAS_INCOMPAT_FEATURE(sb,
2691                                     EXT4_FEATURE_INCOMPAT_RECOVER));
2692
2693         /*
2694          * The first inode we look at is the journal inode.  Don't try
2695          * root first: it may be modified in the journal!
2696          */
2697         if (!test_opt(sb, NOLOAD) &&
2698             EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2699                 if (ext4_load_journal(sb, es, journal_devnum))
2700                         goto failed_mount3;
2701                 if (!(sb->s_flags & MS_RDONLY) &&
2702                     EXT4_SB(sb)->s_journal->j_failed_commit) {
2703                         ext4_msg(sb, KERN_CRIT, "error: "
2704                                "ext4_fill_super: Journal transaction "
2705                                "%u is corrupt",
2706                                EXT4_SB(sb)->s_journal->j_failed_commit);
2707                         if (test_opt(sb, ERRORS_RO)) {
2708                                 ext4_msg(sb, KERN_CRIT,
2709                                        "Mounting filesystem read-only");
2710                                 sb->s_flags |= MS_RDONLY;
2711                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2712                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2713                         }
2714                         if (test_opt(sb, ERRORS_PANIC)) {
2715                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2716                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2717                                 ext4_commit_super(sb, 1);
2718                                 goto failed_mount4;
2719                         }
2720                 }
2721         } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2722               EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2723                 ext4_msg(sb, KERN_ERR, "required journal recovery "
2724                        "suppressed and not mounted read-only");
2725                 goto failed_mount4;
2726         } else {
2727                 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2728                 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2729                 sbi->s_journal = NULL;
2730                 needs_recovery = 0;
2731                 goto no_journal;
2732         }
2733
2734         if (ext4_blocks_count(es) > 0xffffffffULL &&
2735             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2736                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
2737                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
2738                 goto failed_mount4;
2739         }
2740
2741         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2742                 jbd2_journal_set_features(sbi->s_journal,
2743                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2744                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2745         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2746                 jbd2_journal_set_features(sbi->s_journal,
2747                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2748                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2749                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2750         } else {
2751                 jbd2_journal_clear_features(sbi->s_journal,
2752                                 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2753                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2754         }
2755
2756         /* We have now updated the journal if required, so we can
2757          * validate the data journaling mode. */
2758         switch (test_opt(sb, DATA_FLAGS)) {
2759         case 0:
2760                 /* No mode set, assume a default based on the journal
2761                  * capabilities: ORDERED_DATA if the journal can
2762                  * cope, else JOURNAL_DATA
2763                  */
2764                 if (jbd2_journal_check_available_features
2765                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2766                         set_opt(sbi->s_mount_opt, ORDERED_DATA);
2767                 else
2768                         set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2769                 break;
2770
2771         case EXT4_MOUNT_ORDERED_DATA:
2772         case EXT4_MOUNT_WRITEBACK_DATA:
2773                 if (!jbd2_journal_check_available_features
2774                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2775                         ext4_msg(sb, KERN_ERR, "Journal does not support "
2776                                "requested data journaling mode");
2777                         goto failed_mount4;
2778                 }
2779         default:
2780                 break;
2781         }
2782         set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2783
2784 no_journal:
2785
2786         if (test_opt(sb, NOBH)) {
2787                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2788                         ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
2789                                 "its supported only with writeback mode");
2790                         clear_opt(sbi->s_mount_opt, NOBH);
2791                 }
2792         }
2793         /*
2794          * The jbd2_journal_load will have done any necessary log recovery,
2795          * so we can safely mount the rest of the filesystem now.
2796          */
2797
2798         root = ext4_iget(sb, EXT4_ROOT_INO);
2799         if (IS_ERR(root)) {
2800                 ext4_msg(sb, KERN_ERR, "get root inode failed");
2801                 ret = PTR_ERR(root);
2802                 goto failed_mount4;
2803         }
2804         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2805                 iput(root);
2806                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
2807                 goto failed_mount4;
2808         }
2809         sb->s_root = d_alloc_root(root);
2810         if (!sb->s_root) {
2811                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
2812                 iput(root);
2813                 ret = -ENOMEM;
2814                 goto failed_mount4;
2815         }
2816
2817         ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2818
2819         /* determine the minimum size of new large inodes, if present */
2820         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2821                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2822                                                      EXT4_GOOD_OLD_INODE_SIZE;
2823                 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2824                                        EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2825                         if (sbi->s_want_extra_isize <
2826                             le16_to_cpu(es->s_want_extra_isize))
2827                                 sbi->s_want_extra_isize =
2828                                         le16_to_cpu(es->s_want_extra_isize);
2829                         if (sbi->s_want_extra_isize <
2830                             le16_to_cpu(es->s_min_extra_isize))
2831                                 sbi->s_want_extra_isize =
2832                                         le16_to_cpu(es->s_min_extra_isize);
2833                 }
2834         }
2835         /* Check if enough inode space is available */
2836         if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2837                                                         sbi->s_inode_size) {
2838                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2839                                                        EXT4_GOOD_OLD_INODE_SIZE;
2840                 ext4_msg(sb, KERN_INFO, "required extra inode space not"
2841                          "available");
2842         }
2843
2844         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2845                 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
2846                          "requested data journaling mode");
2847                 clear_opt(sbi->s_mount_opt, DELALLOC);
2848         } else if (test_opt(sb, DELALLOC))
2849                 ext4_msg(sb, KERN_INFO, "delayed allocation enabled");
2850
2851         err = ext4_setup_system_zone(sb);
2852         if (err) {
2853                 ext4_msg(sb, KERN_ERR, "failed to initialize system "
2854                          "zone (%d)\n", err);
2855                 goto failed_mount4;
2856         }
2857
2858         ext4_ext_init(sb);
2859         err = ext4_mb_init(sb, needs_recovery);
2860         if (err) {
2861                 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
2862                          err);
2863                 goto failed_mount4;
2864         }
2865
2866         sbi->s_kobj.kset = ext4_kset;
2867         init_completion(&sbi->s_kobj_unregister);
2868         err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
2869                                    "%s", sb->s_id);
2870         if (err) {
2871                 ext4_mb_release(sb);
2872                 ext4_ext_release(sb);
2873                 goto failed_mount4;
2874         };
2875
2876         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2877         ext4_orphan_cleanup(sb, es);
2878         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2879         if (needs_recovery) {
2880                 ext4_msg(sb, KERN_INFO, "recovery complete");
2881                 ext4_mark_recovery_complete(sb, es);
2882         }
2883         if (EXT4_SB(sb)->s_journal) {
2884                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2885                         descr = " journalled data mode";
2886                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2887                         descr = " ordered data mode";
2888                 else
2889                         descr = " writeback data mode";
2890         } else
2891                 descr = "out journal";
2892
2893         ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr);
2894
2895         lock_kernel();
2896         return 0;
2897
2898 cantfind_ext4:
2899         if (!silent)
2900                 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
2901         goto failed_mount;
2902
2903 failed_mount4:
2904         ext4_msg(sb, KERN_ERR, "mount failed");
2905         ext4_release_system_zone(sb);
2906         if (sbi->s_journal) {
2907                 jbd2_journal_destroy(sbi->s_journal);
2908                 sbi->s_journal = NULL;
2909         }
2910 failed_mount3:
2911         if (sbi->s_flex_groups) {
2912                 if (is_vmalloc_addr(sbi->s_flex_groups))
2913                         vfree(sbi->s_flex_groups);
2914                 else
2915                         kfree(sbi->s_flex_groups);
2916         }
2917         percpu_counter_destroy(&sbi->s_freeblocks_counter);
2918         percpu_counter_destroy(&sbi->s_freeinodes_counter);
2919         percpu_counter_destroy(&sbi->s_dirs_counter);
2920         percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2921 failed_mount2:
2922         for (i = 0; i < db_count; i++)
2923                 brelse(sbi->s_group_desc[i]);
2924         kfree(sbi->s_group_desc);
2925 failed_mount:
2926         if (sbi->s_proc) {
2927                 remove_proc_entry(sb->s_id, ext4_proc_root);
2928         }
2929 #ifdef CONFIG_QUOTA
2930         for (i = 0; i < MAXQUOTAS; i++)
2931                 kfree(sbi->s_qf_names[i]);
2932 #endif
2933         ext4_blkdev_remove(sbi);
2934         brelse(bh);
2935 out_fail:
2936         sb->s_fs_info = NULL;
2937         kfree(sbi->s_blockgroup_lock);
2938         kfree(sbi);
2939         lock_kernel();
2940         return ret;
2941 }
2942
2943 /*
2944  * Setup any per-fs journal parameters now.  We'll do this both on
2945  * initial mount, once the journal has been initialised but before we've
2946  * done any recovery; and again on any subsequent remount.
2947  */
2948 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2949 {
2950         struct ext4_sb_info *sbi = EXT4_SB(sb);
2951
2952         journal->j_commit_interval = sbi->s_commit_interval;
2953         journal->j_min_batch_time = sbi->s_min_batch_time;
2954         journal->j_max_batch_time = sbi->s_max_batch_time;
2955
2956         spin_lock(&journal->j_state_lock);
2957         if (test_opt(sb, BARRIER))
2958                 journal->j_flags |= JBD2_BARRIER;
2959         else
2960                 journal->j_flags &= ~JBD2_BARRIER;
2961         if (test_opt(sb, DATA_ERR_ABORT))
2962                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2963         else
2964                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2965         spin_unlock(&journal->j_state_lock);
2966 }
2967
2968 static journal_t *ext4_get_journal(struct super_block *sb,
2969                                    unsigned int journal_inum)
2970 {
2971         struct inode *journal_inode;
2972         journal_t *journal;
2973
2974         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2975
2976         /* First, test for the existence of a valid inode on disk.  Bad
2977          * things happen if we iget() an unused inode, as the subsequent
2978          * iput() will try to delete it. */
2979
2980         journal_inode = ext4_iget(sb, journal_inum);
2981         if (IS_ERR(journal_inode)) {
2982                 ext4_msg(sb, KERN_ERR, "no journal found");
2983                 return NULL;
2984         }
2985         if (!journal_inode->i_nlink) {
2986                 make_bad_inode(journal_inode);
2987                 iput(journal_inode);
2988                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
2989                 return NULL;
2990         }
2991
2992         jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2993                   journal_inode, journal_inode->i_size);
2994         if (!S_ISREG(journal_inode->i_mode)) {
2995                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
2996                 iput(journal_inode);
2997                 return NULL;
2998         }
2999
3000         journal = jbd2_journal_init_inode(journal_inode);
3001         if (!journal) {
3002                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
3003                 iput(journal_inode);
3004                 return NULL;
3005         }
3006         journal->j_private = sb;
3007         ext4_init_journal_params(sb, journal);
3008         return journal;
3009 }
3010
3011 static journal_t *ext4_get_dev_journal(struct super_block *sb,
3012                                        dev_t j_dev)
3013 {
3014         struct buffer_head *bh;
3015         journal_t *journal;
3016         ext4_fsblk_t start;
3017         ext4_fsblk_t len;
3018         int hblock, blocksize;
3019         ext4_fsblk_t sb_block;
3020         unsigned long offset;
3021         struct ext4_super_block *es;
3022         struct block_device *bdev;
3023
3024         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3025
3026         bdev = ext4_blkdev_get(j_dev, sb);
3027         if (bdev == NULL)
3028                 return NULL;
3029
3030         if (bd_claim(bdev, sb)) {
3031                 ext4_msg(sb, KERN_ERR,
3032                         "failed to claim external journal device");
3033                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
3034                 return NULL;
3035         }
3036
3037         blocksize = sb->s_blocksize;
3038         hblock = bdev_hardsect_size(bdev);
3039         if (blocksize < hblock) {
3040                 ext4_msg(sb, KERN_ERR,
3041                         "blocksize too small for journal device");
3042                 goto out_bdev;
3043         }
3044
3045         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
3046         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
3047         set_blocksize(bdev, blocksize);
3048         if (!(bh = __bread(bdev, sb_block, blocksize))) {
3049                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
3050                        "external journal");
3051                 goto out_bdev;
3052         }
3053
3054         es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
3055         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
3056             !(le32_to_cpu(es->s_feature_incompat) &
3057               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
3058                 ext4_msg(sb, KERN_ERR, "external journal has "
3059                                         "bad superblock");
3060                 brelse(bh);
3061                 goto out_bdev;
3062         }
3063
3064         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
3065                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
3066                 brelse(bh);
3067                 goto out_bdev;
3068         }
3069
3070         len = ext4_blocks_count(es);
3071         start = sb_block + 1;
3072         brelse(bh);     /* we're done with the superblock */
3073
3074         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
3075                                         start, len, blocksize);
3076         if (!journal) {
3077                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
3078                 goto out_bdev;
3079         }
3080         journal->j_private = sb;
3081         ll_rw_block(READ, 1, &journal->j_sb_buffer);
3082         wait_on_buffer(journal->j_sb_buffer);
3083         if (!buffer_uptodate(journal->j_sb_buffer)) {
3084                 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
3085                 goto out_journal;
3086         }
3087         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
3088                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
3089                                         "user (unsupported) - %d",
3090                         be32_to_cpu(journal->j_superblock->s_nr_users));
3091                 goto out_journal;
3092         }
3093         EXT4_SB(sb)->journal_bdev = bdev;
3094         ext4_init_journal_params(sb, journal);
3095         return journal;
3096
3097 out_journal:
3098         jbd2_journal_destroy(journal);
3099 out_bdev:
3100         ext4_blkdev_put(bdev);
3101         return NULL;
3102 }
3103
3104 static int ext4_load_journal(struct super_block *sb,
3105                              struct ext4_super_block *es,
3106                              unsigned long journal_devnum)
3107 {
3108         journal_t *journal;
3109         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
3110         dev_t journal_dev;
3111         int err = 0;
3112         int really_read_only;
3113
3114         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3115
3116         if (journal_devnum &&
3117             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3118                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
3119                         "numbers have changed");
3120                 journal_dev = new_decode_dev(journal_devnum);
3121         } else
3122                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
3123
3124         really_read_only = bdev_read_only(sb->s_bdev);
3125
3126         /*
3127          * Are we loading a blank journal or performing recovery after a
3128          * crash?  For recovery, we need to check in advance whether we
3129          * can get read-write access to the device.
3130          */
3131         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3132                 if (sb->s_flags & MS_RDONLY) {
3133                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
3134                                         "required on readonly filesystem");
3135                         if (really_read_only) {
3136                                 ext4_msg(sb, KERN_ERR, "write access "
3137                                         "unavailable, cannot proceed");
3138                                 return -EROFS;
3139                         }
3140                         ext4_msg(sb, KERN_INFO, "write access will "
3141                                "be enabled during recovery");
3142                 }
3143         }
3144
3145         if (journal_inum && journal_dev) {
3146                 ext4_msg(sb, KERN_ERR, "filesystem has both journal "
3147                        "and inode journals!");
3148                 return -EINVAL;
3149         }
3150
3151         if (journal_inum) {
3152                 if (!(journal = ext4_get_journal(sb, journal_inum)))
3153                         return -EINVAL;
3154         } else {
3155                 if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
3156                         return -EINVAL;
3157         }
3158
3159         if (journal->j_flags & JBD2_BARRIER)
3160                 ext4_msg(sb, KERN_INFO, "barriers enabled");
3161         else
3162                 ext4_msg(sb, KERN_INFO, "barriers disabled");
3163
3164         if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
3165                 err = jbd2_journal_update_format(journal);
3166                 if (err)  {
3167                         ext4_msg(sb, KERN_ERR, "error updating journal");
3168                         jbd2_journal_destroy(journal);
3169                         return err;
3170                 }
3171         }
3172
3173         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
3174                 err = jbd2_journal_wipe(journal, !really_read_only);
3175         if (!err)
3176                 err = jbd2_journal_load(journal);
3177
3178         if (err) {
3179                 ext4_msg(sb, KERN_ERR, "error loading journal");
3180                 jbd2_journal_destroy(journal);
3181                 return err;
3182         }
3183
3184         EXT4_SB(sb)->s_journal = journal;
3185         ext4_clear_journal_err(sb, es);
3186
3187         if (journal_devnum &&
3188             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
3189                 es->s_journal_dev = cpu_to_le32(journal_devnum);
3190
3191                 /* Make sure we flush the recovery flag to disk. */
3192                 ext4_commit_super(sb, 1);
3193         }
3194
3195         return 0;
3196 }
3197
3198 static int ext4_commit_super(struct super_block *sb, int sync)
3199 {
3200         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
3201         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
3202         int error = 0;
3203
3204         if (!sbh)
3205                 return error;
3206         if (buffer_write_io_error(sbh)) {
3207                 /*
3208                  * Oh, dear.  A previous attempt to write the
3209                  * superblock failed.  This could happen because the
3210                  * USB device was yanked out.  Or it could happen to
3211                  * be a transient write error and maybe the block will
3212                  * be remapped.  Nothing we can do but to retry the
3213                  * write and hope for the best.
3214                  */
3215                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
3216                        "superblock detected");
3217                 clear_buffer_write_io_error(sbh);
3218                 set_buffer_uptodate(sbh);
3219         }
3220         es->s_wtime = cpu_to_le32(get_seconds());
3221         es->s_kbytes_written =
3222                 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
3223                             ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
3224                               EXT4_SB(sb)->s_sectors_written_start) >> 1));
3225         ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3226                                         &EXT4_SB(sb)->s_freeblocks_counter));
3227         es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
3228                                         &EXT4_SB(sb)->s_freeinodes_counter));
3229         sb->s_dirt = 0;
3230         BUFFER_TRACE(sbh, "marking dirty");
3231         mark_buffer_dirty(sbh);
3232         if (sync) {
3233                 error = sync_dirty_buffer(sbh);
3234                 if (error)
3235                         return error;
3236
3237                 error = buffer_write_io_error(sbh);
3238                 if (error) {
3239                         ext4_msg(sb, KERN_ERR, "I/O error while writing "
3240                                "superblock");
3241                         clear_buffer_write_io_error(sbh);
3242                         set_buffer_uptodate(sbh);
3243                 }
3244         }
3245         return error;
3246 }
3247
3248 /*
3249  * Have we just finished recovery?  If so, and if we are mounting (or
3250  * remounting) the filesystem readonly, then we will end up with a
3251  * consistent fs on disk.  Record that fact.
3252  */
3253 static void ext4_mark_recovery_complete(struct super_block *sb,
3254                                         struct ext4_super_block *es)
3255 {
3256         journal_t *journal = EXT4_SB(sb)->s_journal;
3257
3258         if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3259                 BUG_ON(journal != NULL);
3260                 return;
3261         }
3262         jbd2_journal_lock_updates(journal);
3263         if (jbd2_journal_flush(journal) < 0)
3264                 goto out;
3265
3266         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
3267             sb->s_flags & MS_RDONLY) {
3268                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3269                 ext4_commit_super(sb, 1);
3270         }
3271
3272 out:
3273         jbd2_journal_unlock_updates(journal);
3274 }
3275
3276 /*
3277  * If we are mounting (or read-write remounting) a filesystem whose journal
3278  * has recorded an error from a previous lifetime, move that error to the
3279  * main filesystem now.
3280  */
3281 static void ext4_clear_journal_err(struct super_block *sb,
3282                                    struct ext4_super_block *es)
3283 {
3284         journal_t *journal;
3285         int j_errno;
3286         const char *errstr;
3287
3288         BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
3289
3290         journal = EXT4_SB(sb)->s_journal;
3291
3292         /*
3293          * Now check for any error status which may have been recorded in the
3294          * journal by a prior ext4_error() or ext4_abort()
3295          */
3296
3297         j_errno = jbd2_journal_errno(journal);
3298         if (j_errno) {
3299                 char nbuf[16];
3300
3301                 errstr = ext4_decode_error(sb, j_errno, nbuf);
3302                 ext4_warning(sb, __func__, "Filesystem error recorded "
3303                              "from previous mount: %s", errstr);
3304                 ext4_warning(sb, __func__, "Marking fs in need of "
3305                              "filesystem check.");
3306
3307                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3308                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3309                 ext4_commit_super(sb, 1);
3310
3311                 jbd2_journal_clear_err(journal);
3312         }
3313 }
3314
3315 /*
3316  * Force the running and committing transactions to commit,
3317  * and wait on the commit.
3318  */
3319 int ext4_force_commit(struct super_block *sb)
3320 {
3321         journal_t *journal;
3322         int ret = 0;
3323
3324         if (sb->s_flags & MS_RDONLY)
3325                 return 0;
3326
3327         journal = EXT4_SB(sb)->s_journal;
3328         if (journal)
3329                 ret = ext4_journal_force_commit(journal);
3330
3331         return ret;
3332 }
3333
3334 static void ext4_write_super(struct super_block *sb)
3335 {
3336         ext4_commit_super(sb, 1);
3337 }
3338
3339 static int ext4_sync_fs(struct super_block *sb, int wait)
3340 {
3341         int ret = 0;
3342         tid_t target;
3343
3344         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3345         if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
3346                 if (wait)
3347                         jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
3348         }
3349         return ret;
3350 }
3351
3352 /*
3353  * LVM calls this function before a (read-only) snapshot is created.  This
3354  * gives us a chance to flush the journal completely and mark the fs clean.
3355  */
3356 static int ext4_freeze(struct super_block *sb)
3357 {
3358         int error = 0;
3359         journal_t *journal;
3360
3361         if (sb->s_flags & MS_RDONLY)
3362                 return 0;
3363
3364         journal = EXT4_SB(sb)->s_journal;
3365
3366         /* Now we set up the journal barrier. */
3367         jbd2_journal_lock_updates(journal);
3368
3369         /*
3370          * Don't clear the needs_recovery flag if we failed to flush
3371          * the journal.
3372          */
3373         error = jbd2_journal_flush(journal);
3374         if (error < 0) {
3375         out:
3376                 jbd2_journal_unlock_updates(journal);
3377                 return error;
3378         }
3379
3380         /* Journal blocked and flushed, clear needs_recovery flag. */
3381         EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3382         error = ext4_commit_super(sb, 1);
3383         if (error)
3384                 goto out;
3385         return 0;
3386 }
3387
3388 /*
3389  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3390  * flag here, even though the filesystem is not technically dirty yet.
3391  */
3392 static int ext4_unfreeze(struct super_block *sb)
3393 {
3394         if (sb->s_flags & MS_RDONLY)
3395                 return 0;
3396
3397         lock_super(sb);
3398         /* Reset the needs_recovery flag before the fs is unlocked. */
3399         EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3400         ext4_commit_super(sb, 1);
3401         unlock_super(sb);
3402         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3403         return 0;
3404 }
3405
3406 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3407 {
3408         struct ext4_super_block *es;
3409         struct ext4_sb_info *sbi = EXT4_SB(sb);
3410         ext4_fsblk_t n_blocks_count = 0;
3411         unsigned long old_sb_flags;
3412         struct ext4_mount_options old_opts;
3413         ext4_group_t g;
3414         unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3415         int err;
3416 #ifdef CONFIG_QUOTA
3417         int i;
3418 #endif
3419
3420         /* Store the original options */
3421         old_sb_flags = sb->s_flags;
3422         old_opts.s_mount_opt = sbi->s_mount_opt;
3423         old_opts.s_resuid = sbi->s_resuid;
3424         old_opts.s_resgid = sbi->s_resgid;
3425         old_opts.s_commit_interval = sbi->s_commit_interval;
3426         old_opts.s_min_batch_time = sbi->s_min_batch_time;
3427         old_opts.s_max_batch_time = sbi->s_max_batch_time;
3428 #ifdef CONFIG_QUOTA
3429         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3430         for (i = 0; i < MAXQUOTAS; i++)
3431                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3432 #endif
3433         if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3434                 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3435
3436         /*
3437          * Allow the "check" option to be passed as a remount option.
3438          */
3439         if (!parse_options(data, sb, NULL, &journal_ioprio,
3440                            &n_blocks_count, 1)) {
3441                 err = -EINVAL;
3442                 goto restore_opts;
3443         }
3444
3445         if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3446                 ext4_abort(sb, __func__, "Abort forced by user");
3447
3448         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3449                 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3450
3451         es = sbi->s_es;
3452
3453         if (sbi->s_journal) {
3454                 ext4_init_journal_params(sb, sbi->s_journal);
3455                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3456         }
3457
3458         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3459                 n_blocks_count > ext4_blocks_count(es)) {
3460                 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3461                         err = -EROFS;
3462                         goto restore_opts;
3463                 }
3464
3465                 if (*flags & MS_RDONLY) {
3466                         /*
3467                          * First of all, the unconditional stuff we have to do
3468                          * to disable replay of the journal when we next remount
3469                          */
3470                         sb->s_flags |= MS_RDONLY;
3471
3472                         /*
3473                          * OK, test if we are remounting a valid rw partition
3474                          * readonly, and if so set the rdonly flag and then
3475                          * mark the partition as valid again.
3476                          */
3477                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3478                             (sbi->s_mount_state & EXT4_VALID_FS))
3479                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
3480
3481                         if (sbi->s_journal)
3482                                 ext4_mark_recovery_complete(sb, es);
3483                 } else {
3484                         int ret;
3485                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3486                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3487                                 ext4_msg(sb, KERN_WARNING, "couldn't "
3488                                        "remount RDWR because of unsupported "
3489                                        "optional features (%x)",
3490                                 (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3491                                         ~EXT4_FEATURE_RO_COMPAT_SUPP));
3492                                 err = -EROFS;
3493                                 goto restore_opts;
3494                         }
3495
3496                         /*
3497                          * Make sure the group descriptor checksums
3498                          * are sane.  If they aren't, refuse to remount r/w.
3499                          */
3500                         for (g = 0; g < sbi->s_groups_count; g++) {
3501                                 struct ext4_group_desc *gdp =
3502                                         ext4_get_group_desc(sb, g, NULL);
3503
3504                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3505                                         ext4_msg(sb, KERN_ERR,
3506                "ext4_remount: Checksum for group %u failed (%u!=%u)",
3507                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3508                                                le16_to_cpu(gdp->bg_checksum));
3509                                         err = -EINVAL;
3510                                         goto restore_opts;
3511                                 }
3512                         }
3513
3514                         /*
3515                          * If we have an unprocessed orphan list hanging
3516                          * around from a previously readonly bdev mount,
3517                          * require a full umount/remount for now.
3518                          */
3519                         if (es->s_last_orphan) {
3520                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
3521                                        "remount RDWR because of unprocessed "
3522                                        "orphan inode list.  Please "
3523                                        "umount/remount instead");
3524                                 err = -EINVAL;
3525                                 goto restore_opts;
3526                         }
3527
3528                         /*
3529                          * Mounting a RDONLY partition read-write, so reread
3530                          * and store the current valid flag.  (It may have
3531                          * been changed by e2fsck since we originally mounted
3532                          * the partition.)
3533                          */
3534                         if (sbi->s_journal)
3535                                 ext4_clear_journal_err(sb, es);
3536                         sbi->s_mount_state = le16_to_cpu(es->s_state);
3537                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3538                                 goto restore_opts;
3539                         if (!ext4_setup_super(sb, es, 0))
3540                                 sb->s_flags &= ~MS_RDONLY;
3541                 }
3542         }
3543         ext4_setup_system_zone(sb);
3544         if (sbi->s_journal == NULL)
3545                 ext4_commit_super(sb, 1);
3546
3547 #ifdef CONFIG_QUOTA
3548         /* Release old quota file names */
3549         for (i = 0; i < MAXQUOTAS; i++)
3550                 if (old_opts.s_qf_names[i] &&
3551                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3552                         kfree(old_opts.s_qf_names[i]);
3553 #endif
3554         return 0;
3555
3556 restore_opts:
3557         sb->s_flags = old_sb_flags;
3558         sbi->s_mount_opt = old_opts.s_mount_opt;
3559         sbi->s_resuid = old_opts.s_resuid;
3560         sbi->s_resgid = old_opts.s_resgid;
3561         sbi->s_commit_interval = old_opts.s_commit_interval;
3562         sbi->s_min_batch_time = old_opts.s_min_batch_time;
3563         sbi->s_max_batch_time = old_opts.s_max_batch_time;
3564 #ifdef CONFIG_QUOTA
3565         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3566         for (i = 0; i < MAXQUOTAS; i++) {
3567                 if (sbi->s_qf_names[i] &&
3568                     old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3569                         kfree(sbi->s_qf_names[i]);
3570                 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3571         }
3572 #endif
3573         return err;
3574 }
3575
3576 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3577 {
3578         struct super_block *sb = dentry->d_sb;
3579         struct ext4_sb_info *sbi = EXT4_SB(sb);
3580         struct ext4_super_block *es = sbi->s_es;
3581         u64 fsid;
3582
3583         if (test_opt(sb, MINIX_DF)) {
3584                 sbi->s_overhead_last = 0;
3585         } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3586                 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3587                 ext4_fsblk_t overhead = 0;
3588
3589                 /*
3590                  * Compute the overhead (FS structures).  This is constant
3591                  * for a given filesystem unless the number of block groups
3592                  * changes so we cache the previous value until it does.
3593                  */
3594
3595                 /*
3596                  * All of the blocks before first_data_block are
3597                  * overhead
3598                  */
3599                 overhead = le32_to_cpu(es->s_first_data_block);
3600
3601                 /*
3602                  * Add the overhead attributed to the superblock and
3603                  * block group descriptors.  If the sparse superblocks
3604                  * feature is turned on, then not all groups have this.
3605                  */
3606                 for (i = 0; i < ngroups; i++) {
3607                         overhead += ext4_bg_has_super(sb, i) +
3608                                 ext4_bg_num_gdb(sb, i);
3609                         cond_resched();
3610                 }
3611
3612                 /*
3613                  * Every block group has an inode bitmap, a block
3614                  * bitmap, and an inode table.
3615                  */
3616                 overhead += ngroups * (2 + sbi->s_itb_per_group);
3617                 sbi->s_overhead_last = overhead;
3618                 smp_wmb();
3619                 sbi->s_blocks_last = ext4_blocks_count(es);
3620         }
3621
3622         buf->f_type = EXT4_SUPER_MAGIC;
3623         buf->f_bsize = sb->s_blocksize;
3624         buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3625         buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3626                        percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3627         ext4_free_blocks_count_set(es, buf->f_bfree);
3628         buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3629         if (buf->f_bfree < ext4_r_blocks_count(es))
3630                 buf->f_bavail = 0;
3631         buf->f_files = le32_to_cpu(es->s_inodes_count);
3632         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3633         es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3634         buf->f_namelen = EXT4_NAME_LEN;
3635         fsid = le64_to_cpup((void *)es->s_uuid) ^
3636                le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3637         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3638         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3639
3640         return 0;
3641 }
3642
3643 /* Helper function for writing quotas on sync - we need to start transaction
3644  * before quota file is locked for write. Otherwise the are possible deadlocks:
3645  * Process 1                         Process 2
3646  * ext4_create()                     quota_sync()
3647  *   jbd2_journal_start()                  write_dquot()
3648  *   vfs_dq_init()                         down(dqio_mutex)
3649  *     down(dqio_mutex)                    jbd2_journal_start()
3650  *
3651  */
3652
3653 #ifdef CONFIG_QUOTA
3654
3655 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3656 {
3657         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3658 }
3659
3660 static int ext4_write_dquot(struct dquot *dquot)
3661 {
3662         int ret, err;
3663         handle_t *handle;
3664         struct inode *inode;
3665
3666         inode = dquot_to_inode(dquot);
3667         handle = ext4_journal_start(inode,
3668                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3669         if (IS_ERR(handle))
3670                 return PTR_ERR(handle);
3671         ret = dquot_commit(dquot);
3672         err = ext4_journal_stop(handle);
3673         if (!ret)
3674                 ret = err;
3675         return ret;
3676 }
3677
3678 static int ext4_acquire_dquot(struct dquot *dquot)
3679 {
3680         int ret, err;
3681         handle_t *handle;
3682
3683         handle = ext4_journal_start(dquot_to_inode(dquot),
3684                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3685         if (IS_ERR(handle))
3686                 return PTR_ERR(handle);
3687         ret = dquot_acquire(dquot);
3688         err = ext4_journal_stop(handle);
3689         if (!ret)
3690                 ret = err;
3691         return ret;
3692 }
3693
3694 static int ext4_release_dquot(struct dquot *dquot)
3695 {
3696         int ret, err;
3697         handle_t *handle;
3698
3699         handle = ext4_journal_start(dquot_to_inode(dquot),
3700                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3701         if (IS_ERR(handle)) {
3702                 /* Release dquot anyway to avoid endless cycle in dqput() */
3703                 dquot_release(dquot);
3704                 return PTR_ERR(handle);
3705         }
3706         ret = dquot_release(dquot);
3707         err = ext4_journal_stop(handle);
3708         if (!ret)
3709                 ret = err;
3710         return ret;
3711 }
3712
3713 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3714 {
3715         /* Are we journaling quotas? */
3716         if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3717             EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3718                 dquot_mark_dquot_dirty(dquot);
3719                 return ext4_write_dquot(dquot);
3720         } else {
3721                 return dquot_mark_dquot_dirty(dquot);
3722         }
3723 }
3724
3725 static int ext4_write_info(struct super_block *sb, int type)
3726 {
3727         int ret, err;
3728         handle_t *handle;
3729
3730         /* Data block + inode block */
3731         handle = ext4_journal_start(sb->s_root->d_inode, 2);
3732         if (IS_ERR(handle))
3733                 return PTR_ERR(handle);
3734         ret = dquot_commit_info(sb, type);
3735         err = ext4_journal_stop(handle);
3736         if (!ret)
3737                 ret = err;
3738         return ret;
3739 }
3740
3741 /*
3742  * Turn on quotas during mount time - we need to find
3743  * the quota file and such...
3744  */
3745 static int ext4_quota_on_mount(struct super_block *sb, int type)
3746 {
3747         return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3748                                   EXT4_SB(sb)->s_jquota_fmt, type);
3749 }
3750
3751 /*
3752  * Standard function to be called on quota_on
3753  */
3754 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3755                          char *name, int remount)
3756 {
3757         int err;
3758         struct path path;
3759
3760         if (!test_opt(sb, QUOTA))
3761                 return -EINVAL;
3762         /* When remounting, no checks are needed and in fact, name is NULL */
3763         if (remount)
3764                 return vfs_quota_on(sb, type, format_id, name, remount);
3765
3766         err = kern_path(name, LOOKUP_FOLLOW, &path);
3767         if (err)
3768                 return err;
3769
3770         /* Quotafile not on the same filesystem? */
3771         if (path.mnt->mnt_sb != sb) {
3772                 path_put(&path);
3773                 return -EXDEV;
3774         }
3775         /* Journaling quota? */
3776         if (EXT4_SB(sb)->s_qf_names[type]) {
3777                 /* Quotafile not in fs root? */
3778                 if (path.dentry->d_parent != sb->s_root)
3779                         ext4_msg(sb, KERN_WARNING,
3780                                 "Quota file not on filesystem root. "
3781                                 "Journaled quota will not work");
3782         }
3783
3784         /*
3785          * When we journal data on quota file, we have to flush journal to see
3786          * all updates to the file when we bypass pagecache...
3787          */
3788         if (EXT4_SB(sb)->s_journal &&
3789             ext4_should_journal_data(path.dentry->d_inode)) {
3790                 /*
3791                  * We don't need to lock updates but journal_flush() could
3792                  * otherwise be livelocked...
3793                  */
3794                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3795                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3796                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3797                 if (err) {
3798                         path_put(&path);
3799                         return err;
3800                 }
3801         }
3802
3803         err = vfs_quota_on_path(sb, type, format_id, &path);
3804         path_put(&path);
3805         return err;
3806 }
3807
3808 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3809  * acquiring the locks... As quota files are never truncated and quota code
3810  * itself serializes the operations (and noone else should touch the files)
3811  * we don't have to be afraid of races */
3812 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3813                                size_t len, loff_t off)
3814 {
3815         struct inode *inode = sb_dqopt(sb)->files[type];
3816         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3817         int err = 0;
3818         int offset = off & (sb->s_blocksize - 1);
3819         int tocopy;
3820         size_t toread;
3821         struct buffer_head *bh;
3822         loff_t i_size = i_size_read(inode);
3823
3824         if (off > i_size)
3825                 return 0;
3826         if (off+len > i_size)
3827                 len = i_size-off;
3828         toread = len;
3829         while (toread > 0) {
3830                 tocopy = sb->s_blocksize - offset < toread ?
3831                                 sb->s_blocksize - offset : toread;
3832                 bh = ext4_bread(NULL, inode, blk, 0, &err);
3833                 if (err)
3834                         return err;
3835                 if (!bh)        /* A hole? */
3836                         memset(data, 0, tocopy);
3837                 else
3838                         memcpy(data, bh->b_data+offset, tocopy);
3839                 brelse(bh);
3840                 offset = 0;
3841                 toread -= tocopy;
3842                 data += tocopy;
3843                 blk++;
3844         }
3845         return len;
3846 }
3847
3848 /* Write to quotafile (we know the transaction is already started and has
3849  * enough credits) */
3850 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3851                                 const char *data, size_t len, loff_t off)
3852 {
3853         struct inode *inode = sb_dqopt(sb)->files[type];
3854         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3855         int err = 0;
3856         int offset = off & (sb->s_blocksize - 1);
3857         int tocopy;
3858         int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3859         size_t towrite = len;
3860         struct buffer_head *bh;
3861         handle_t *handle = journal_current_handle();
3862
3863         if (EXT4_SB(sb)->s_journal && !handle) {
3864                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3865                         " cancelled because transaction is not started",
3866                         (unsigned long long)off, (unsigned long long)len);
3867                 return -EIO;
3868         }
3869         mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3870         while (towrite > 0) {
3871                 tocopy = sb->s_blocksize - offset < towrite ?
3872                                 sb->s_blocksize - offset : towrite;
3873                 bh = ext4_bread(handle, inode, blk, 1, &err);
3874                 if (!bh)
3875                         goto out;
3876                 if (journal_quota) {
3877                         err = ext4_journal_get_write_access(handle, bh);
3878                         if (err) {
3879                                 brelse(bh);
3880                                 goto out;
3881                         }
3882                 }
3883                 lock_buffer(bh);
3884                 memcpy(bh->b_data+offset, data, tocopy);
3885                 flush_dcache_page(bh->b_page);
3886                 unlock_buffer(bh);
3887                 if (journal_quota)
3888                         err = ext4_handle_dirty_metadata(handle, NULL, bh);
3889                 else {
3890                         /* Always do at least ordered writes for quotas */
3891                         err = ext4_jbd2_file_inode(handle, inode);
3892                         mark_buffer_dirty(bh);
3893                 }
3894                 brelse(bh);
3895                 if (err)
3896                         goto out;
3897                 offset = 0;
3898                 towrite -= tocopy;
3899                 data += tocopy;
3900                 blk++;
3901         }
3902 out:
3903         if (len == towrite) {
3904                 mutex_unlock(&inode->i_mutex);
3905                 return err;
3906         }
3907         if (inode->i_size < off+len-towrite) {
3908                 i_size_write(inode, off+len-towrite);
3909                 EXT4_I(inode)->i_disksize = inode->i_size;
3910         }
3911         inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3912         ext4_mark_inode_dirty(handle, inode);
3913         mutex_unlock(&inode->i_mutex);
3914         return len - towrite;
3915 }
3916
3917 #endif
3918
3919 static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3920                        const char *dev_name, void *data, struct vfsmount *mnt)
3921 {
3922         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3923 }
3924
3925 static struct file_system_type ext4_fs_type = {
3926         .owner          = THIS_MODULE,
3927         .name           = "ext4",
3928         .get_sb         = ext4_get_sb,
3929         .kill_sb        = kill_block_super,
3930         .fs_flags       = FS_REQUIRES_DEV,
3931 };
3932
3933 #ifdef CONFIG_EXT4DEV_COMPAT
3934 static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
3935                           const char *dev_name, void *data,struct vfsmount *mnt)
3936 {
3937         printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs "
3938                "to mount using ext4\n", dev_name);
3939         printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility "
3940                "will go away by 2.6.31\n", dev_name);
3941         return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3942 }
3943
3944 static struct file_system_type ext4dev_fs_type = {
3945         .owner          = THIS_MODULE,
3946         .name           = "ext4dev",
3947         .get_sb         = ext4dev_get_sb,
3948         .kill_sb        = kill_block_super,
3949         .fs_flags       = FS_REQUIRES_DEV,
3950 };
3951 MODULE_ALIAS("ext4dev");
3952 #endif
3953
3954 static int __init init_ext4_fs(void)
3955 {
3956         int err;
3957
3958         err = init_ext4_system_zone();
3959         if (err)
3960                 return err;
3961         ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
3962         if (!ext4_kset)
3963                 goto out4;
3964         ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3965         err = init_ext4_mballoc();
3966         if (err)
3967                 goto out3;
3968
3969         err = init_ext4_xattr();
3970         if (err)
3971                 goto out2;
3972         err = init_inodecache();
3973         if (err)
3974                 goto out1;
3975         err = register_filesystem(&ext4_fs_type);
3976         if (err)
3977                 goto out;
3978 #ifdef CONFIG_EXT4DEV_COMPAT
3979         err = register_filesystem(&ext4dev_fs_type);
3980         if (err) {
3981                 unregister_filesystem(&ext4_fs_type);
3982                 goto out;
3983         }
3984 #endif
3985         return 0;
3986 out:
3987         destroy_inodecache();
3988 out1:
3989         exit_ext4_xattr();
3990 out2:
3991         exit_ext4_mballoc();
3992 out3:
3993         remove_proc_entry("fs/ext4", NULL);
3994         kset_unregister(ext4_kset);
3995 out4:
3996         exit_ext4_system_zone();
3997         return err;
3998 }
3999
4000 static void __exit exit_ext4_fs(void)
4001 {
4002         unregister_filesystem(&ext4_fs_type);
4003 #ifdef CONFIG_EXT4DEV_COMPAT
4004         unregister_filesystem(&ext4dev_fs_type);
4005 #endif
4006         destroy_inodecache();
4007         exit_ext4_xattr();
4008         exit_ext4_mballoc();
4009         remove_proc_entry("fs/ext4", NULL);
4010         kset_unregister(ext4_kset);
4011         exit_ext4_system_zone();
4012 }
4013
4014 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
4015 MODULE_DESCRIPTION("Fourth Extended Filesystem");
4016 MODULE_LICENSE("GPL");
4017 module_init(init_ext4_fs)
4018 module_exit(exit_ext4_fs)