2  * Copyright (C) 2007 Oracle.  All rights reserved.
 
   4  * This program is free software; you can redistribute it and/or
 
   5  * modify it under the terms of the GNU General Public
 
   6  * License v2 as published by the Free Software Foundation.
 
   8  * This program is distributed in the hope that it will be useful,
 
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 
  11  * General Public License for more details.
 
  13  * You should have received a copy of the GNU General Public
 
  14  * License along with this program; if not, write to the
 
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 
  16  * Boston, MA 021110-1307, USA.
 
  18 #include <linux/sched.h>
 
  19 #include <linux/pagemap.h>
 
  20 #include <linux/writeback.h>
 
  21 #include <linux/blkdev.h>
 
  26 #include "print-tree.h"
 
  27 #include "transaction.h"
 
  30 #include "ref-cache.h"
 
  32 #define PENDING_EXTENT_INSERT 0
 
  33 #define PENDING_EXTENT_DELETE 1
 
  34 #define PENDING_BACKREF_UPDATE 2
 
  36 struct pending_extent_op {
 
  47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
 
  48                                  btrfs_root *extent_root, int all);
 
  49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
 
  50                                btrfs_root *extent_root, int all);
 
  51 static struct btrfs_block_group_cache *
 
  52 __btrfs_find_block_group(struct btrfs_root *root,
 
  53                          struct btrfs_block_group_cache *hint,
 
  54                          u64 search_start, int data, int owner);
 
  56 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
 
  58         return (cache->flags & bits) == bits;
 
  62  * this adds the block group to the fs_info rb tree for the block group
 
  65 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
 
  66                                 struct btrfs_block_group_cache *block_group)
 
  69         struct rb_node *parent = NULL;
 
  70         struct btrfs_block_group_cache *cache;
 
  72         spin_lock(&info->block_group_cache_lock);
 
  73         p = &info->block_group_cache_tree.rb_node;
 
  77                 cache = rb_entry(parent, struct btrfs_block_group_cache,
 
  79                 if (block_group->key.objectid < cache->key.objectid) {
 
  81                 } else if (block_group->key.objectid > cache->key.objectid) {
 
  84                         spin_unlock(&info->block_group_cache_lock);
 
  89         rb_link_node(&block_group->cache_node, parent, p);
 
  90         rb_insert_color(&block_group->cache_node,
 
  91                         &info->block_group_cache_tree);
 
  92         spin_unlock(&info->block_group_cache_lock);
 
  98  * This will return the block group at or after bytenr if contains is 0, else
 
  99  * it will return the block group that contains the bytenr
 
 101 static struct btrfs_block_group_cache *
 
 102 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
 
 105         struct btrfs_block_group_cache *cache, *ret = NULL;
 
 109         spin_lock(&info->block_group_cache_lock);
 
 110         n = info->block_group_cache_tree.rb_node;
 
 113                 cache = rb_entry(n, struct btrfs_block_group_cache,
 
 115                 end = cache->key.objectid + cache->key.offset - 1;
 
 116                 start = cache->key.objectid;
 
 118                 if (bytenr < start) {
 
 119                         if (!contains && (!ret || start < ret->key.objectid))
 
 122                 } else if (bytenr > start) {
 
 123                         if (contains && bytenr <= end) {
 
 133         spin_unlock(&info->block_group_cache_lock);
 
 139  * this is only called by cache_block_group, since we could have freed extents
 
 140  * we need to check the pinned_extents for any extents that can't be used yet
 
 141  * since their free space will be released as soon as the transaction commits.
 
 143 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
 
 144                               struct btrfs_fs_info *info, u64 start, u64 end)
 
 146         u64 extent_start, extent_end, size;
 
 149         mutex_lock(&info->pinned_mutex);
 
 150         while (start < end) {
 
 151                 ret = find_first_extent_bit(&info->pinned_extents, start,
 
 152                                             &extent_start, &extent_end,
 
 157                 if (extent_start == start) {
 
 158                         start = extent_end + 1;
 
 159                 } else if (extent_start > start && extent_start < end) {
 
 160                         size = extent_start - start;
 
 161                         ret = btrfs_add_free_space_lock(block_group, start,
 
 164                         start = extent_end + 1;
 
 172                 ret = btrfs_add_free_space_lock(block_group, start, size);
 
 175         mutex_unlock(&info->pinned_mutex);
 
 180 static int cache_block_group(struct btrfs_root *root,
 
 181                              struct btrfs_block_group_cache *block_group)
 
 183         struct btrfs_path *path;
 
 185         struct btrfs_key key;
 
 186         struct extent_buffer *leaf;
 
 195         root = root->fs_info->extent_root;
 
 197         if (block_group->cached)
 
 200         path = btrfs_alloc_path();
 
 206          * we get into deadlocks with paths held by callers of this function.
 
 207          * since the alloc_mutex is protecting things right now, just
 
 208          * skip the locking here
 
 210         path->skip_locking = 1;
 
 211         first_free = max_t(u64, block_group->key.objectid,
 
 212                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
 
 213         key.objectid = block_group->key.objectid;
 
 215         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 
 216         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 
 219         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
 
 223                 leaf = path->nodes[0];
 
 224                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
 225                 if (key.objectid + key.offset > first_free)
 
 226                         first_free = key.objectid + key.offset;
 
 229                 leaf = path->nodes[0];
 
 230                 slot = path->slots[0];
 
 231                 if (slot >= btrfs_header_nritems(leaf)) {
 
 232                         ret = btrfs_next_leaf(root, path);
 
 240                 btrfs_item_key_to_cpu(leaf, &key, slot);
 
 241                 if (key.objectid < block_group->key.objectid)
 
 244                 if (key.objectid >= block_group->key.objectid +
 
 245                     block_group->key.offset)
 
 248                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
 
 254                         add_new_free_space(block_group, root->fs_info, last,
 
 257                         last = key.objectid + key.offset;
 
 266         add_new_free_space(block_group, root->fs_info, last,
 
 267                            block_group->key.objectid +
 
 268                            block_group->key.offset);
 
 270         block_group->cached = 1;
 
 273         btrfs_free_path(path);
 
 278  * return the block group that starts at or after bytenr
 
 280 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
 
 284         struct btrfs_block_group_cache *cache;
 
 286         cache = block_group_cache_tree_search(info, bytenr, 0);
 
 292  * return the block group that contains teh given bytenr
 
 294 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
 
 298         struct btrfs_block_group_cache *cache;
 
 300         cache = block_group_cache_tree_search(info, bytenr, 1);
 
 305 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 308         struct list_head *head = &info->space_info;
 
 309         struct list_head *cur;
 
 310         struct btrfs_space_info *found;
 
 311         list_for_each(cur, head) {
 
 312                 found = list_entry(cur, struct btrfs_space_info, list);
 
 313                 if (found->flags == flags)
 
 319 static u64 div_factor(u64 num, int factor)
 
 328 static struct btrfs_block_group_cache *
 
 329 __btrfs_find_block_group(struct btrfs_root *root,
 
 330                          struct btrfs_block_group_cache *hint,
 
 331                          u64 search_start, int data, int owner)
 
 333         struct btrfs_block_group_cache *cache;
 
 334         struct btrfs_block_group_cache *found_group = NULL;
 
 335         struct btrfs_fs_info *info = root->fs_info;
 
 343         if (data & BTRFS_BLOCK_GROUP_METADATA)
 
 347                 struct btrfs_block_group_cache *shint;
 
 348                 shint = btrfs_lookup_first_block_group(info, search_start);
 
 349                 if (shint && block_group_bits(shint, data) && !shint->ro) {
 
 350                         spin_lock(&shint->lock);
 
 351                         used = btrfs_block_group_used(&shint->item);
 
 352                         if (used + shint->pinned + shint->reserved <
 
 353                             div_factor(shint->key.offset, factor)) {
 
 354                                 spin_unlock(&shint->lock);
 
 357                         spin_unlock(&shint->lock);
 
 360         if (hint && !hint->ro && block_group_bits(hint, data)) {
 
 361                 spin_lock(&hint->lock);
 
 362                 used = btrfs_block_group_used(&hint->item);
 
 363                 if (used + hint->pinned + hint->reserved <
 
 364                     div_factor(hint->key.offset, factor)) {
 
 365                         spin_unlock(&hint->lock);
 
 368                 spin_unlock(&hint->lock);
 
 369                 last = hint->key.objectid + hint->key.offset;
 
 372                         last = max(hint->key.objectid, search_start);
 
 378                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
 
 382                 spin_lock(&cache->lock);
 
 383                 last = cache->key.objectid + cache->key.offset;
 
 384                 used = btrfs_block_group_used(&cache->item);
 
 386                 if (!cache->ro && block_group_bits(cache, data)) {
 
 387                         free_check = div_factor(cache->key.offset, factor);
 
 388                         if (used + cache->pinned + cache->reserved <
 
 391                                 spin_unlock(&cache->lock);
 
 395                 spin_unlock(&cache->lock);
 
 403         if (!full_search && factor < 10) {
 
 413 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
 
 414                                                  struct btrfs_block_group_cache
 
 415                                                  *hint, u64 search_start,
 
 419         struct btrfs_block_group_cache *ret;
 
 420         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
 
 424 /* simple helper to search for an existing extent at a given offset */
 
 425 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 
 428         struct btrfs_key key;
 
 429         struct btrfs_path *path;
 
 431         path = btrfs_alloc_path();
 
 433         key.objectid = start;
 
 435         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 
 436         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
 
 438         btrfs_free_path(path);
 
 443  * Back reference rules.  Back refs have three main goals:
 
 445  * 1) differentiate between all holders of references to an extent so that
 
 446  *    when a reference is dropped we can make sure it was a valid reference
 
 447  *    before freeing the extent.
 
 449  * 2) Provide enough information to quickly find the holders of an extent
 
 450  *    if we notice a given block is corrupted or bad.
 
 452  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
 
 453  *    maintenance.  This is actually the same as #2, but with a slightly
 
 454  *    different use case.
 
 456  * File extents can be referenced by:
 
 458  * - multiple snapshots, subvolumes, or different generations in one subvol
 
 459  * - different files inside a single subvolume
 
 460  * - different offsets inside a file (bookend extents in file.c)
 
 462  * The extent ref structure has fields for:
 
 464  * - Objectid of the subvolume root
 
 465  * - Generation number of the tree holding the reference
 
 466  * - objectid of the file holding the reference
 
 467  * - number of references holding by parent node (alway 1 for tree blocks)
 
 469  * Btree leaf may hold multiple references to a file extent. In most cases,
 
 470  * these references are from same file and the corresponding offsets inside
 
 471  * the file are close together.
 
 473  * When a file extent is allocated the fields are filled in:
 
 474  *     (root_key.objectid, trans->transid, inode objectid, 1)
 
 476  * When a leaf is cow'd new references are added for every file extent found
 
 477  * in the leaf.  It looks similar to the create case, but trans->transid will
 
 478  * be different when the block is cow'd.
 
 480  *     (root_key.objectid, trans->transid, inode objectid,
 
 481  *      number of references in the leaf)
 
 483  * When a file extent is removed either during snapshot deletion or
 
 484  * file truncation, we find the corresponding back reference and check
 
 485  * the following fields:
 
 487  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
 
 490  * Btree extents can be referenced by:
 
 492  * - Different subvolumes
 
 493  * - Different generations of the same subvolume
 
 495  * When a tree block is created, back references are inserted:
 
 497  * (root->root_key.objectid, trans->transid, level, 1)
 
 499  * When a tree block is cow'd, new back references are added for all the
 
 500  * blocks it points to. If the tree block isn't in reference counted root,
 
 501  * the old back references are removed. These new back references are of
 
 502  * the form (trans->transid will have increased since creation):
 
 504  * (root->root_key.objectid, trans->transid, level, 1)
 
 506  * When a backref is in deleting, the following fields are checked:
 
 508  * if backref was for a tree root:
 
 509  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
 
 511  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
 
 513  * Back Reference Key composing:
 
 515  * The key objectid corresponds to the first byte in the extent, the key
 
 516  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
 
 517  * byte of parent extent. If a extent is tree root, the key offset is set
 
 518  * to the key objectid.
 
 521 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
 
 522                                           struct btrfs_root *root,
 
 523                                           struct btrfs_path *path,
 
 524                                           u64 bytenr, u64 parent,
 
 525                                           u64 ref_root, u64 ref_generation,
 
 526                                           u64 owner_objectid, int del)
 
 528         struct btrfs_key key;
 
 529         struct btrfs_extent_ref *ref;
 
 530         struct extent_buffer *leaf;
 
 534         key.objectid = bytenr;
 
 535         key.type = BTRFS_EXTENT_REF_KEY;
 
 538         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
 
 546         leaf = path->nodes[0];
 
 547         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
 
 548         ref_objectid = btrfs_ref_objectid(leaf, ref);
 
 549         if (btrfs_ref_root(leaf, ref) != ref_root ||
 
 550             btrfs_ref_generation(leaf, ref) != ref_generation ||
 
 551             (ref_objectid != owner_objectid &&
 
 552              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
 
 562 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
 
 563                                           struct btrfs_root *root,
 
 564                                           struct btrfs_path *path,
 
 565                                           u64 bytenr, u64 parent,
 
 566                                           u64 ref_root, u64 ref_generation,
 
 569         struct btrfs_key key;
 
 570         struct extent_buffer *leaf;
 
 571         struct btrfs_extent_ref *ref;
 
 575         key.objectid = bytenr;
 
 576         key.type = BTRFS_EXTENT_REF_KEY;
 
 579         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
 
 581                 leaf = path->nodes[0];
 
 582                 ref = btrfs_item_ptr(leaf, path->slots[0],
 
 583                                      struct btrfs_extent_ref);
 
 584                 btrfs_set_ref_root(leaf, ref, ref_root);
 
 585                 btrfs_set_ref_generation(leaf, ref, ref_generation);
 
 586                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
 
 587                 btrfs_set_ref_num_refs(leaf, ref, 1);
 
 588         } else if (ret == -EEXIST) {
 
 590                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
 
 591                 leaf = path->nodes[0];
 
 592                 ref = btrfs_item_ptr(leaf, path->slots[0],
 
 593                                      struct btrfs_extent_ref);
 
 594                 if (btrfs_ref_root(leaf, ref) != ref_root ||
 
 595                     btrfs_ref_generation(leaf, ref) != ref_generation) {
 
 601                 num_refs = btrfs_ref_num_refs(leaf, ref);
 
 602                 BUG_ON(num_refs == 0);
 
 603                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
 
 605                 existing_owner = btrfs_ref_objectid(leaf, ref);
 
 606                 if (existing_owner != owner_objectid &&
 
 607                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
 
 608                         btrfs_set_ref_objectid(leaf, ref,
 
 609                                         BTRFS_MULTIPLE_OBJECTIDS);
 
 615         btrfs_mark_buffer_dirty(path->nodes[0]);
 
 617         btrfs_release_path(root, path);
 
 621 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
 
 622                                           struct btrfs_root *root,
 
 623                                           struct btrfs_path *path)
 
 625         struct extent_buffer *leaf;
 
 626         struct btrfs_extent_ref *ref;
 
 630         leaf = path->nodes[0];
 
 631         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
 
 632         num_refs = btrfs_ref_num_refs(leaf, ref);
 
 633         BUG_ON(num_refs == 0);
 
 636                 ret = btrfs_del_item(trans, root, path);
 
 638                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
 
 639                 btrfs_mark_buffer_dirty(leaf);
 
 641         btrfs_release_path(root, path);
 
 645 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
 
 646                                      struct btrfs_root *root, u64 bytenr,
 
 647                                      u64 orig_parent, u64 parent,
 
 648                                      u64 orig_root, u64 ref_root,
 
 649                                      u64 orig_generation, u64 ref_generation,
 
 653         struct btrfs_root *extent_root = root->fs_info->extent_root;
 
 654         struct btrfs_path *path;
 
 656         if (root == root->fs_info->extent_root) {
 
 657                 struct pending_extent_op *extent_op;
 
 660                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
 
 661                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
 
 662                 mutex_lock(&root->fs_info->extent_ins_mutex);
 
 663                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
 
 664                                 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) {
 
 666                         ret = get_state_private(&root->fs_info->extent_ins,
 
 669                         extent_op = (struct pending_extent_op *)
 
 671                         BUG_ON(extent_op->parent != orig_parent);
 
 672                         BUG_ON(extent_op->generation != orig_generation);
 
 674                         extent_op->parent = parent;
 
 675                         extent_op->generation = ref_generation;
 
 677                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
 
 680                         extent_op->type = PENDING_BACKREF_UPDATE;
 
 681                         extent_op->bytenr = bytenr;
 
 682                         extent_op->num_bytes = num_bytes;
 
 683                         extent_op->parent = parent;
 
 684                         extent_op->orig_parent = orig_parent;
 
 685                         extent_op->generation = ref_generation;
 
 686                         extent_op->orig_generation = orig_generation;
 
 687                         extent_op->level = (int)owner_objectid;
 
 689                         set_extent_bits(&root->fs_info->extent_ins,
 
 690                                         bytenr, bytenr + num_bytes - 1,
 
 691                                         EXTENT_WRITEBACK, GFP_NOFS);
 
 692                         set_state_private(&root->fs_info->extent_ins,
 
 693                                           bytenr, (unsigned long)extent_op);
 
 695                 mutex_unlock(&root->fs_info->extent_ins_mutex);
 
 699         path = btrfs_alloc_path();
 
 702         ret = lookup_extent_backref(trans, extent_root, path,
 
 703                                     bytenr, orig_parent, orig_root,
 
 704                                     orig_generation, owner_objectid, 1);
 
 707         ret = remove_extent_backref(trans, extent_root, path);
 
 710         ret = insert_extent_backref(trans, extent_root, path, bytenr,
 
 711                                     parent, ref_root, ref_generation,
 
 714         finish_current_insert(trans, extent_root, 0);
 
 715         del_pending_extents(trans, extent_root, 0);
 
 717         btrfs_free_path(path);
 
 721 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
 
 722                             struct btrfs_root *root, u64 bytenr,
 
 723                             u64 orig_parent, u64 parent,
 
 724                             u64 ref_root, u64 ref_generation,
 
 728         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
 
 729             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
 
 731         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
 
 732                                         parent, ref_root, ref_root,
 
 733                                         ref_generation, ref_generation,
 
 738 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
 739                                   struct btrfs_root *root, u64 bytenr,
 
 740                                   u64 orig_parent, u64 parent,
 
 741                                   u64 orig_root, u64 ref_root,
 
 742                                   u64 orig_generation, u64 ref_generation,
 
 745         struct btrfs_path *path;
 
 747         struct btrfs_key key;
 
 748         struct extent_buffer *l;
 
 749         struct btrfs_extent_item *item;
 
 752         path = btrfs_alloc_path();
 
 757         key.objectid = bytenr;
 
 758         key.type = BTRFS_EXTENT_ITEM_KEY;
 
 759         key.offset = (u64)-1;
 
 761         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
 
 765         BUG_ON(ret == 0 || path->slots[0] == 0);
 
 770         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
 771         if (key.objectid != bytenr) {
 
 772                 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
 
 773                 printk("wanted %Lu found %Lu\n", bytenr, key.objectid);
 
 776         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
 
 778         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
 
 779         refs = btrfs_extent_refs(l, item);
 
 780         btrfs_set_extent_refs(l, item, refs + 1);
 
 781         btrfs_mark_buffer_dirty(path->nodes[0]);
 
 783         btrfs_release_path(root->fs_info->extent_root, path);
 
 786         ret = insert_extent_backref(trans, root->fs_info->extent_root,
 
 787                                     path, bytenr, parent,
 
 788                                     ref_root, ref_generation,
 
 791         finish_current_insert(trans, root->fs_info->extent_root, 0);
 
 792         del_pending_extents(trans, root->fs_info->extent_root, 0);
 
 794         btrfs_free_path(path);
 
 798 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
 799                          struct btrfs_root *root,
 
 800                          u64 bytenr, u64 num_bytes, u64 parent,
 
 801                          u64 ref_root, u64 ref_generation,
 
 805         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
 
 806             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
 
 808         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
 
 809                                      0, ref_root, 0, ref_generation,
 
 814 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
 
 815                          struct btrfs_root *root)
 
 817         finish_current_insert(trans, root->fs_info->extent_root, 1);
 
 818         del_pending_extents(trans, root->fs_info->extent_root, 1);
 
 822 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
 
 823                             struct btrfs_root *root, u64 bytenr,
 
 824                             u64 num_bytes, u32 *refs)
 
 826         struct btrfs_path *path;
 
 828         struct btrfs_key key;
 
 829         struct extent_buffer *l;
 
 830         struct btrfs_extent_item *item;
 
 832         WARN_ON(num_bytes < root->sectorsize);
 
 833         path = btrfs_alloc_path();
 
 835         key.objectid = bytenr;
 
 836         key.offset = num_bytes;
 
 837         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 
 838         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
 
 843                 btrfs_print_leaf(root, path->nodes[0]);
 
 844                 printk("failed to find block number %Lu\n", bytenr);
 
 848         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
 
 849         *refs = btrfs_extent_refs(l, item);
 
 851         btrfs_free_path(path);
 
 855 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
 
 856                           struct btrfs_root *root, u64 bytenr)
 
 858         struct btrfs_root *extent_root = root->fs_info->extent_root;
 
 859         struct btrfs_path *path;
 
 860         struct extent_buffer *leaf;
 
 861         struct btrfs_extent_ref *ref_item;
 
 862         struct btrfs_key key;
 
 863         struct btrfs_key found_key;
 
 869         key.objectid = bytenr;
 
 870         key.offset = (u64)-1;
 
 871         key.type = BTRFS_EXTENT_ITEM_KEY;
 
 873         path = btrfs_alloc_path();
 
 874         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
 
 880         if (path->slots[0] == 0)
 
 884         leaf = path->nodes[0];
 
 885         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
 887         if (found_key.objectid != bytenr ||
 
 888             found_key.type != BTRFS_EXTENT_ITEM_KEY)
 
 891         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
 
 893                 leaf = path->nodes[0];
 
 894                 nritems = btrfs_header_nritems(leaf);
 
 895                 if (path->slots[0] >= nritems) {
 
 896                         ret = btrfs_next_leaf(extent_root, path);
 
 903                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
 904                 if (found_key.objectid != bytenr)
 
 907                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
 
 912                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
 
 913                                           struct btrfs_extent_ref);
 
 914                 ref_root = btrfs_ref_root(leaf, ref_item);
 
 915                 if (ref_root != root->root_key.objectid &&
 
 916                     ref_root != BTRFS_TREE_LOG_OBJECTID) {
 
 920                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
 
 929         btrfs_free_path(path);
 
 933 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
 934                     struct extent_buffer *buf, u32 nr_extents)
 
 936         struct btrfs_key key;
 
 937         struct btrfs_file_extent_item *fi;
 
 948         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
 
 950                 root_gen = root->root_key.offset;
 
 953                 root_gen = trans->transid - 1;
 
 956         level = btrfs_header_level(buf);
 
 957         nritems = btrfs_header_nritems(buf);
 
 960                 struct btrfs_leaf_ref *ref;
 
 961                 struct btrfs_extent_info *info;
 
 963                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
 
 969                 ref->root_gen = root_gen;
 
 970                 ref->bytenr = buf->start;
 
 971                 ref->owner = btrfs_header_owner(buf);
 
 972                 ref->generation = btrfs_header_generation(buf);
 
 973                 ref->nritems = nr_extents;
 
 976                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
 
 978                         btrfs_item_key_to_cpu(buf, &key, i);
 
 979                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 
 981                         fi = btrfs_item_ptr(buf, i,
 
 982                                             struct btrfs_file_extent_item);
 
 983                         if (btrfs_file_extent_type(buf, fi) ==
 
 984                             BTRFS_FILE_EXTENT_INLINE)
 
 986                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
 
 987                         if (disk_bytenr == 0)
 
 990                         info->bytenr = disk_bytenr;
 
 992                                 btrfs_file_extent_disk_num_bytes(buf, fi);
 
 993                         info->objectid = key.objectid;
 
 994                         info->offset = key.offset;
 
 998                 ret = btrfs_add_leaf_ref(root, ref, shared);
 
 999                 if (ret == -EEXIST && shared) {
 
1000                         struct btrfs_leaf_ref *old;
 
1001                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
 
1003                         btrfs_remove_leaf_ref(root, old);
 
1004                         btrfs_free_leaf_ref(root, old);
 
1005                         ret = btrfs_add_leaf_ref(root, ref, shared);
 
1008                 btrfs_free_leaf_ref(root, ref);
 
1014 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
1015                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
 
1022         u64 orig_generation;
 
1024         u32 nr_file_extents = 0;
 
1025         struct btrfs_key key;
 
1026         struct btrfs_file_extent_item *fi;
 
1031         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
 
1032                             u64, u64, u64, u64, u64, u64, u64, u64);
 
1034         ref_root = btrfs_header_owner(buf);
 
1035         ref_generation = btrfs_header_generation(buf);
 
1036         orig_root = btrfs_header_owner(orig_buf);
 
1037         orig_generation = btrfs_header_generation(orig_buf);
 
1039         nritems = btrfs_header_nritems(buf);
 
1040         level = btrfs_header_level(buf);
 
1042         if (root->ref_cows) {
 
1043                 process_func = __btrfs_inc_extent_ref;
 
1046                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
 
1049                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
 
1051                 process_func = __btrfs_update_extent_ref;
 
1054         for (i = 0; i < nritems; i++) {
 
1057                         btrfs_item_key_to_cpu(buf, &key, i);
 
1058                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 
1060                         fi = btrfs_item_ptr(buf, i,
 
1061                                             struct btrfs_file_extent_item);
 
1062                         if (btrfs_file_extent_type(buf, fi) ==
 
1063                             BTRFS_FILE_EXTENT_INLINE)
 
1065                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
 
1071                         ret = process_func(trans, root, bytenr,
 
1072                                            orig_buf->start, buf->start,
 
1073                                            orig_root, ref_root,
 
1074                                            orig_generation, ref_generation,
 
1083                         bytenr = btrfs_node_blockptr(buf, i);
 
1084                         ret = process_func(trans, root, bytenr,
 
1085                                            orig_buf->start, buf->start,
 
1086                                            orig_root, ref_root,
 
1087                                            orig_generation, ref_generation,
 
1099                         *nr_extents = nr_file_extents;
 
1101                         *nr_extents = nritems;
 
1109 int btrfs_update_ref(struct btrfs_trans_handle *trans,
 
1110                      struct btrfs_root *root, struct extent_buffer *orig_buf,
 
1111                      struct extent_buffer *buf, int start_slot, int nr)
 
1118         u64 orig_generation;
 
1119         struct btrfs_key key;
 
1120         struct btrfs_file_extent_item *fi;
 
1126         BUG_ON(start_slot < 0);
 
1127         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
 
1129         ref_root = btrfs_header_owner(buf);
 
1130         ref_generation = btrfs_header_generation(buf);
 
1131         orig_root = btrfs_header_owner(orig_buf);
 
1132         orig_generation = btrfs_header_generation(orig_buf);
 
1133         level = btrfs_header_level(buf);
 
1135         if (!root->ref_cows) {
 
1137                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
 
1140                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
 
1144         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
 
1147                         btrfs_item_key_to_cpu(buf, &key, slot);
 
1148                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 
1150                         fi = btrfs_item_ptr(buf, slot,
 
1151                                             struct btrfs_file_extent_item);
 
1152                         if (btrfs_file_extent_type(buf, fi) ==
 
1153                             BTRFS_FILE_EXTENT_INLINE)
 
1155                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
 
1158                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
 
1159                                             orig_buf->start, buf->start,
 
1160                                             orig_root, ref_root,
 
1161                                             orig_generation, ref_generation,
 
1166                         bytenr = btrfs_node_blockptr(buf, slot);
 
1167                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
 
1168                                             orig_buf->start, buf->start,
 
1169                                             orig_root, ref_root,
 
1170                                             orig_generation, ref_generation,
 
1182 static int write_one_cache_group(struct btrfs_trans_handle *trans,
 
1183                                  struct btrfs_root *root,
 
1184                                  struct btrfs_path *path,
 
1185                                  struct btrfs_block_group_cache *cache)
 
1189         struct btrfs_root *extent_root = root->fs_info->extent_root;
 
1191         struct extent_buffer *leaf;
 
1193         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
 
1198         leaf = path->nodes[0];
 
1199         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
 
1200         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
 
1201         btrfs_mark_buffer_dirty(leaf);
 
1202         btrfs_release_path(extent_root, path);
 
1204         finish_current_insert(trans, extent_root, 0);
 
1205         pending_ret = del_pending_extents(trans, extent_root, 0);
 
1214 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
1215                                    struct btrfs_root *root)
 
1217         struct btrfs_block_group_cache *cache, *entry;
 
1221         struct btrfs_path *path;
 
1224         path = btrfs_alloc_path();
 
1230                 spin_lock(&root->fs_info->block_group_cache_lock);
 
1231                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
 
1232                      n; n = rb_next(n)) {
 
1233                         entry = rb_entry(n, struct btrfs_block_group_cache,
 
1240                 spin_unlock(&root->fs_info->block_group_cache_lock);
 
1246                 last += cache->key.offset;
 
1248                 err = write_one_cache_group(trans, root,
 
1251                  * if we fail to write the cache group, we want
 
1252                  * to keep it marked dirty in hopes that a later
 
1260         btrfs_free_path(path);
 
1264 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 
1265                              u64 total_bytes, u64 bytes_used,
 
1266                              struct btrfs_space_info **space_info)
 
1268         struct btrfs_space_info *found;
 
1270         found = __find_space_info(info, flags);
 
1272                 spin_lock(&found->lock);
 
1273                 found->total_bytes += total_bytes;
 
1274                 found->bytes_used += bytes_used;
 
1276                 spin_unlock(&found->lock);
 
1277                 *space_info = found;
 
1280         found = kmalloc(sizeof(*found), GFP_NOFS);
 
1284         list_add(&found->list, &info->space_info);
 
1285         INIT_LIST_HEAD(&found->block_groups);
 
1286         init_rwsem(&found->groups_sem);
 
1287         spin_lock_init(&found->lock);
 
1288         found->flags = flags;
 
1289         found->total_bytes = total_bytes;
 
1290         found->bytes_used = bytes_used;
 
1291         found->bytes_pinned = 0;
 
1292         found->bytes_reserved = 0;
 
1294         found->force_alloc = 0;
 
1295         *space_info = found;
 
1299 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
1301         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
 
1302                                    BTRFS_BLOCK_GROUP_RAID1 |
 
1303                                    BTRFS_BLOCK_GROUP_RAID10 |
 
1304                                    BTRFS_BLOCK_GROUP_DUP);
 
1306                 if (flags & BTRFS_BLOCK_GROUP_DATA)
 
1307                         fs_info->avail_data_alloc_bits |= extra_flags;
 
1308                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
 
1309                         fs_info->avail_metadata_alloc_bits |= extra_flags;
 
1310                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
 
1311                         fs_info->avail_system_alloc_bits |= extra_flags;
 
1315 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 
1317         u64 num_devices = root->fs_info->fs_devices->num_devices;
 
1319         if (num_devices == 1)
 
1320                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
 
1321         if (num_devices < 4)
 
1322                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
 
1324         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
 
1325             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
 
1326                       BTRFS_BLOCK_GROUP_RAID10))) {
 
1327                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
 
1330         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
 
1331             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
 
1332                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
 
1335         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
 
1336             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
 
1337              (flags & BTRFS_BLOCK_GROUP_RAID10) |
 
1338              (flags & BTRFS_BLOCK_GROUP_DUP)))
 
1339                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
 
1343 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 
1344                           struct btrfs_root *extent_root, u64 alloc_bytes,
 
1345                           u64 flags, int force)
 
1347         struct btrfs_space_info *space_info;
 
1351         int ret = 0, waited = 0;
 
1353         flags = reduce_alloc_profile(extent_root, flags);
 
1355         space_info = __find_space_info(extent_root->fs_info, flags);
 
1357                 ret = update_space_info(extent_root->fs_info, flags,
 
1361         BUG_ON(!space_info);
 
1363         spin_lock(&space_info->lock);
 
1364         if (space_info->force_alloc) {
 
1366                 space_info->force_alloc = 0;
 
1368         if (space_info->full) {
 
1369                 spin_unlock(&space_info->lock);
 
1373         thresh = div_factor(space_info->total_bytes, 6);
 
1375            (space_info->bytes_used + space_info->bytes_pinned +
 
1376             space_info->bytes_reserved + alloc_bytes) < thresh) {
 
1377                 spin_unlock(&space_info->lock);
 
1381         spin_unlock(&space_info->lock);
 
1383         ret = mutex_trylock(&extent_root->fs_info->chunk_mutex);
 
1384         if (!ret && !force) {
 
1387                 mutex_lock(&extent_root->fs_info->chunk_mutex);
 
1392                 spin_lock(&space_info->lock);
 
1393                 if (space_info->full) {
 
1394                         spin_unlock(&space_info->lock);
 
1397                 spin_unlock(&space_info->lock);
 
1400         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
 
1402 printk("space info full %Lu\n", flags);
 
1403                 space_info->full = 1;
 
1407         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
 
1408                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
 
1411         mutex_unlock(&extent_root->fs_info->chunk_mutex);
 
1416 static int update_block_group(struct btrfs_trans_handle *trans,
 
1417                               struct btrfs_root *root,
 
1418                               u64 bytenr, u64 num_bytes, int alloc,
 
1421         struct btrfs_block_group_cache *cache;
 
1422         struct btrfs_fs_info *info = root->fs_info;
 
1423         u64 total = num_bytes;
 
1428                 cache = btrfs_lookup_block_group(info, bytenr);
 
1432                 byte_in_group = bytenr - cache->key.objectid;
 
1433                 WARN_ON(byte_in_group > cache->key.offset);
 
1435                 spin_lock(&cache->space_info->lock);
 
1436                 spin_lock(&cache->lock);
 
1438                 old_val = btrfs_block_group_used(&cache->item);
 
1439                 num_bytes = min(total, cache->key.offset - byte_in_group);
 
1441                         old_val += num_bytes;
 
1442                         cache->space_info->bytes_used += num_bytes;
 
1443                         btrfs_set_block_group_used(&cache->item, old_val);
 
1444                         spin_unlock(&cache->lock);
 
1445                         spin_unlock(&cache->space_info->lock);
 
1447                         old_val -= num_bytes;
 
1448                         cache->space_info->bytes_used -= num_bytes;
 
1449                         btrfs_set_block_group_used(&cache->item, old_val);
 
1450                         spin_unlock(&cache->lock);
 
1451                         spin_unlock(&cache->space_info->lock);
 
1454                                 ret = btrfs_add_free_space(cache, bytenr,
 
1461                 bytenr += num_bytes;
 
1466 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
 
1468         struct btrfs_block_group_cache *cache;
 
1470         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
 
1474         return cache->key.objectid;
 
1477 int btrfs_update_pinned_extents(struct btrfs_root *root,
 
1478                                 u64 bytenr, u64 num, int pin)
 
1481         struct btrfs_block_group_cache *cache;
 
1482         struct btrfs_fs_info *fs_info = root->fs_info;
 
1484         WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
 
1486                 set_extent_dirty(&fs_info->pinned_extents,
 
1487                                 bytenr, bytenr + num - 1, GFP_NOFS);
 
1489                 clear_extent_dirty(&fs_info->pinned_extents,
 
1490                                 bytenr, bytenr + num - 1, GFP_NOFS);
 
1493                 cache = btrfs_lookup_block_group(fs_info, bytenr);
 
1495                 len = min(num, cache->key.offset -
 
1496                           (bytenr - cache->key.objectid));
 
1498                         spin_lock(&cache->space_info->lock);
 
1499                         spin_lock(&cache->lock);
 
1500                         cache->pinned += len;
 
1501                         cache->space_info->bytes_pinned += len;
 
1502                         spin_unlock(&cache->lock);
 
1503                         spin_unlock(&cache->space_info->lock);
 
1504                         fs_info->total_pinned += len;
 
1506                         spin_lock(&cache->space_info->lock);
 
1507                         spin_lock(&cache->lock);
 
1508                         cache->pinned -= len;
 
1509                         cache->space_info->bytes_pinned -= len;
 
1510                         spin_unlock(&cache->lock);
 
1511                         spin_unlock(&cache->space_info->lock);
 
1512                         fs_info->total_pinned -= len;
 
1520 static int update_reserved_extents(struct btrfs_root *root,
 
1521                                    u64 bytenr, u64 num, int reserve)
 
1524         struct btrfs_block_group_cache *cache;
 
1525         struct btrfs_fs_info *fs_info = root->fs_info;
 
1528                 cache = btrfs_lookup_block_group(fs_info, bytenr);
 
1530                 len = min(num, cache->key.offset -
 
1531                           (bytenr - cache->key.objectid));
 
1533                 spin_lock(&cache->space_info->lock);
 
1534                 spin_lock(&cache->lock);
 
1536                         cache->reserved += len;
 
1537                         cache->space_info->bytes_reserved += len;
 
1539                         cache->reserved -= len;
 
1540                         cache->space_info->bytes_reserved -= len;
 
1542                 spin_unlock(&cache->lock);
 
1543                 spin_unlock(&cache->space_info->lock);
 
1550 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
 
1555         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
 
1558         mutex_lock(&root->fs_info->pinned_mutex);
 
1560                 ret = find_first_extent_bit(pinned_extents, last,
 
1561                                             &start, &end, EXTENT_DIRTY);
 
1564                 set_extent_dirty(copy, start, end, GFP_NOFS);
 
1567         mutex_unlock(&root->fs_info->pinned_mutex);
 
1571 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
1572                                struct btrfs_root *root,
 
1573                                struct extent_io_tree *unpin)
 
1578         struct btrfs_block_group_cache *cache;
 
1580         mutex_lock(&root->fs_info->pinned_mutex);
 
1582                 ret = find_first_extent_bit(unpin, 0, &start, &end,
 
1586                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
 
1587                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
 
1588                 cache = btrfs_lookup_block_group(root->fs_info, start);
 
1590                         btrfs_add_free_space(cache, start, end - start + 1);
 
1591                 if (need_resched()) {
 
1592                         mutex_unlock(&root->fs_info->pinned_mutex);
 
1594                         mutex_lock(&root->fs_info->pinned_mutex);
 
1597         mutex_unlock(&root->fs_info->pinned_mutex);
 
1601 static int finish_current_insert(struct btrfs_trans_handle *trans,
 
1602                                  struct btrfs_root *extent_root, int all)
 
1608         struct btrfs_fs_info *info = extent_root->fs_info;
 
1609         struct btrfs_path *path;
 
1610         struct btrfs_extent_ref *ref;
 
1611         struct pending_extent_op *extent_op;
 
1612         struct btrfs_key key;
 
1613         struct btrfs_extent_item extent_item;
 
1617         btrfs_set_stack_extent_refs(&extent_item, 1);
 
1618         path = btrfs_alloc_path();
 
1621                 mutex_lock(&info->extent_ins_mutex);
 
1622                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
 
1623                                             &end, EXTENT_WRITEBACK);
 
1625                         mutex_unlock(&info->extent_ins_mutex);
 
1626                         if (search && all) {
 
1633                 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
 
1636                         mutex_unlock(&info->extent_ins_mutex);
 
1642                 ret = get_state_private(&info->extent_ins, start, &priv);
 
1644                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
 
1646                 mutex_unlock(&info->extent_ins_mutex);
 
1648                 if (extent_op->type == PENDING_EXTENT_INSERT) {
 
1649                         key.objectid = start;
 
1650                         key.offset = end + 1 - start;
 
1651                         key.type = BTRFS_EXTENT_ITEM_KEY;
 
1652                         err = btrfs_insert_item(trans, extent_root, &key,
 
1653                                         &extent_item, sizeof(extent_item));
 
1656                         mutex_lock(&info->extent_ins_mutex);
 
1657                         clear_extent_bits(&info->extent_ins, start, end,
 
1658                                           EXTENT_WRITEBACK, GFP_NOFS);
 
1659                         mutex_unlock(&info->extent_ins_mutex);
 
1661                         err = insert_extent_backref(trans, extent_root, path,
 
1662                                                 start, extent_op->parent,
 
1663                                                 extent_root->root_key.objectid,
 
1664                                                 extent_op->generation,
 
1667                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
 
1668                         err = lookup_extent_backref(trans, extent_root, path,
 
1669                                                 start, extent_op->orig_parent,
 
1670                                                 extent_root->root_key.objectid,
 
1671                                                 extent_op->orig_generation,
 
1672                                                 extent_op->level, 0);
 
1675                         mutex_lock(&info->extent_ins_mutex);
 
1676                         clear_extent_bits(&info->extent_ins, start, end,
 
1677                                           EXTENT_WRITEBACK, GFP_NOFS);
 
1678                         mutex_unlock(&info->extent_ins_mutex);
 
1680                         key.objectid = start;
 
1681                         key.offset = extent_op->parent;
 
1682                         key.type = BTRFS_EXTENT_REF_KEY;
 
1683                         err = btrfs_set_item_key_safe(trans, extent_root, path,
 
1686                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
 
1687                                              struct btrfs_extent_ref);
 
1688                         btrfs_set_ref_generation(path->nodes[0], ref,
 
1689                                                  extent_op->generation);
 
1690                         btrfs_mark_buffer_dirty(path->nodes[0]);
 
1691                         btrfs_release_path(extent_root, path);
 
1696                 unlock_extent(&info->extent_ins, start, end, GFP_NOFS);
 
1704         btrfs_free_path(path);
 
1708 static int pin_down_bytes(struct btrfs_trans_handle *trans,
 
1709                           struct btrfs_root *root,
 
1710                           u64 bytenr, u64 num_bytes, int is_data)
 
1713         struct extent_buffer *buf;
 
1718         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
 
1722         /* we can reuse a block if it hasn't been written
 
1723          * and it is from this transaction.  We can't
 
1724          * reuse anything from the tree log root because
 
1725          * it has tiny sub-transactions.
 
1727         if (btrfs_buffer_uptodate(buf, 0) &&
 
1728             btrfs_try_tree_lock(buf)) {
 
1729                 u64 header_owner = btrfs_header_owner(buf);
 
1730                 u64 header_transid = btrfs_header_generation(buf);
 
1731                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
 
1732                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
 
1733                     header_transid == trans->transid &&
 
1734                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
 
1735                         clean_tree_block(NULL, root, buf);
 
1736                         btrfs_tree_unlock(buf);
 
1737                         free_extent_buffer(buf);
 
1740                 btrfs_tree_unlock(buf);
 
1742         free_extent_buffer(buf);
 
1744         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
 
1751  * remove an extent from the root, returns 0 on success
 
1753 static int __free_extent(struct btrfs_trans_handle *trans,
 
1754                          struct btrfs_root *root,
 
1755                          u64 bytenr, u64 num_bytes, u64 parent,
 
1756                          u64 root_objectid, u64 ref_generation,
 
1757                          u64 owner_objectid, int pin, int mark_free)
 
1759         struct btrfs_path *path;
 
1760         struct btrfs_key key;
 
1761         struct btrfs_fs_info *info = root->fs_info;
 
1762         struct btrfs_root *extent_root = info->extent_root;
 
1763         struct extent_buffer *leaf;
 
1765         int extent_slot = 0;
 
1766         int found_extent = 0;
 
1768         struct btrfs_extent_item *ei;
 
1771         key.objectid = bytenr;
 
1772         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 
1773         key.offset = num_bytes;
 
1774         path = btrfs_alloc_path();
 
1779         ret = lookup_extent_backref(trans, extent_root, path,
 
1780                                     bytenr, parent, root_objectid,
 
1781                                     ref_generation, owner_objectid, 1);
 
1783                 struct btrfs_key found_key;
 
1784                 extent_slot = path->slots[0];
 
1785                 while(extent_slot > 0) {
 
1787                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
 
1789                         if (found_key.objectid != bytenr)
 
1791                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
 
1792                             found_key.offset == num_bytes) {
 
1796                         if (path->slots[0] - extent_slot > 5)
 
1799                 if (!found_extent) {
 
1800                         ret = remove_extent_backref(trans, extent_root, path);
 
1802                         btrfs_release_path(extent_root, path);
 
1803                         ret = btrfs_search_slot(trans, extent_root,
 
1806                         extent_slot = path->slots[0];
 
1809                 btrfs_print_leaf(extent_root, path->nodes[0]);
 
1811                 printk("Unable to find ref byte nr %Lu root %Lu "
 
1812                        "gen %Lu owner %Lu\n", bytenr,
 
1813                        root_objectid, ref_generation, owner_objectid);
 
1816         leaf = path->nodes[0];
 
1817         ei = btrfs_item_ptr(leaf, extent_slot,
 
1818                             struct btrfs_extent_item);
 
1819         refs = btrfs_extent_refs(leaf, ei);
 
1822         btrfs_set_extent_refs(leaf, ei, refs);
 
1824         btrfs_mark_buffer_dirty(leaf);
 
1826         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
 
1827                 struct btrfs_extent_ref *ref;
 
1828                 ref = btrfs_item_ptr(leaf, path->slots[0],
 
1829                                      struct btrfs_extent_ref);
 
1830                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
 
1831                 /* if the back ref and the extent are next to each other
 
1832                  * they get deleted below in one shot
 
1834                 path->slots[0] = extent_slot;
 
1836         } else if (found_extent) {
 
1837                 /* otherwise delete the extent back ref */
 
1838                 ret = remove_extent_backref(trans, extent_root, path);
 
1840                 /* if refs are 0, we need to setup the path for deletion */
 
1842                         btrfs_release_path(extent_root, path);
 
1843                         ret = btrfs_search_slot(trans, extent_root, &key, path,
 
1852 #ifdef BIO_RW_DISCARD
 
1853                 u64 map_length = num_bytes;
 
1854                 struct btrfs_multi_bio *multi = NULL;
 
1858                         mutex_lock(&root->fs_info->pinned_mutex);
 
1859                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
 
1860                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
 
1861                         mutex_unlock(&root->fs_info->pinned_mutex);
 
1867                 /* block accounting for super block */
 
1868                 spin_lock_irq(&info->delalloc_lock);
 
1869                 super_used = btrfs_super_bytes_used(&info->super_copy);
 
1870                 btrfs_set_super_bytes_used(&info->super_copy,
 
1871                                            super_used - num_bytes);
 
1872                 spin_unlock_irq(&info->delalloc_lock);
 
1874                 /* block accounting for root item */
 
1875                 root_used = btrfs_root_used(&root->root_item);
 
1876                 btrfs_set_root_used(&root->root_item,
 
1877                                            root_used - num_bytes);
 
1878                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
 
1881                 btrfs_release_path(extent_root, path);
 
1882                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
 
1886 #ifdef BIO_RW_DISCARD
 
1887                 /* Tell the block device(s) that the sectors can be discarded */
 
1888                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
 
1889                                       bytenr, &map_length, &multi, 0);
 
1891                         struct btrfs_bio_stripe *stripe = multi->stripes;
 
1894                         if (map_length > num_bytes)
 
1895                                 map_length = num_bytes;
 
1897                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
 
1898                                 blkdev_issue_discard(stripe->dev->bdev,
 
1899                                                      stripe->physical >> 9,
 
1906         btrfs_free_path(path);
 
1907         finish_current_insert(trans, extent_root, 0);
 
1912  * find all the blocks marked as pending in the radix tree and remove
 
1913  * them from the extent map
 
1915 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
 
1916                                btrfs_root *extent_root, int all)
 
1924         struct extent_io_tree *pending_del;
 
1925         struct extent_io_tree *extent_ins;
 
1926         struct pending_extent_op *extent_op;
 
1927         struct btrfs_fs_info *info = extent_root->fs_info;
 
1929         extent_ins = &extent_root->fs_info->extent_ins;
 
1930         pending_del = &extent_root->fs_info->pending_del;
 
1933                 mutex_lock(&info->extent_ins_mutex);
 
1934                 ret = find_first_extent_bit(pending_del, search, &start, &end,
 
1937                         mutex_unlock(&info->extent_ins_mutex);
 
1938                         if (all && search) {
 
1945                 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS);
 
1948                         mutex_unlock(&info->extent_ins_mutex);
 
1954                 ret = get_state_private(pending_del, start, &priv);
 
1956                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
 
1958                 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK,
 
1960                 if (!test_range_bit(extent_ins, start, end,
 
1961                                     EXTENT_WRITEBACK, 0)) {
 
1962                         mutex_unlock(&info->extent_ins_mutex);
 
1964                         ret = __free_extent(trans, extent_root,
 
1965                                             start, end + 1 - start,
 
1966                                             extent_op->orig_parent,
 
1967                                             extent_root->root_key.objectid,
 
1968                                             extent_op->orig_generation,
 
1969                                             extent_op->level, 1, 0);
 
1974                         ret = get_state_private(&info->extent_ins, start,
 
1977                         extent_op = (struct pending_extent_op *)
 
1978                                                 (unsigned long)priv;
 
1980                         clear_extent_bits(&info->extent_ins, start, end,
 
1981                                           EXTENT_WRITEBACK, GFP_NOFS);
 
1983                         mutex_unlock(&info->extent_ins_mutex);
 
1985                         if (extent_op->type == PENDING_BACKREF_UPDATE)
 
1988                         mutex_lock(&extent_root->fs_info->pinned_mutex);
 
1989                         ret = pin_down_bytes(trans, extent_root, start,
 
1990                                              end + 1 - start, 0);
 
1991                         mutex_unlock(&extent_root->fs_info->pinned_mutex);
 
1993                         ret = update_block_group(trans, extent_root, start,
 
1994                                                 end + 1 - start, 0, ret > 0);
 
2001                 unlock_extent(extent_ins, start, end, GFP_NOFS);
 
2013  * remove an extent from the root, returns 0 on success
 
2015 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
2016                                struct btrfs_root *root,
 
2017                                u64 bytenr, u64 num_bytes, u64 parent,
 
2018                                u64 root_objectid, u64 ref_generation,
 
2019                                u64 owner_objectid, int pin)
 
2021         struct btrfs_root *extent_root = root->fs_info->extent_root;
 
2025         WARN_ON(num_bytes < root->sectorsize);
 
2026         if (root == extent_root) {
 
2027                 struct pending_extent_op *extent_op;
 
2029                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
 
2032                 extent_op->type = PENDING_EXTENT_DELETE;
 
2033                 extent_op->bytenr = bytenr;
 
2034                 extent_op->num_bytes = num_bytes;
 
2035                 extent_op->parent = parent;
 
2036                 extent_op->orig_parent = parent;
 
2037                 extent_op->generation = ref_generation;
 
2038                 extent_op->orig_generation = ref_generation;
 
2039                 extent_op->level = (int)owner_objectid;
 
2041                 mutex_lock(&root->fs_info->extent_ins_mutex);
 
2042                 set_extent_bits(&root->fs_info->pending_del,
 
2043                                 bytenr, bytenr + num_bytes - 1,
 
2044                                 EXTENT_WRITEBACK, GFP_NOFS);
 
2045                 set_state_private(&root->fs_info->pending_del,
 
2046                                   bytenr, (unsigned long)extent_op);
 
2047                 mutex_unlock(&root->fs_info->extent_ins_mutex);
 
2050         /* if metadata always pin */
 
2051         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
 
2052                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
 
2053                         struct btrfs_block_group_cache *cache;
 
2055                         /* btrfs_free_reserved_extent */
 
2056                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
 
2058                         btrfs_add_free_space(cache, bytenr, num_bytes);
 
2059                         update_reserved_extents(root, bytenr, num_bytes, 0);
 
2065         /* if data pin when any transaction has committed this */
 
2066         if (ref_generation != trans->transid)
 
2069         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
 
2070                             root_objectid, ref_generation,
 
2071                             owner_objectid, pin, pin == 0);
 
2073         finish_current_insert(trans, root->fs_info->extent_root, 0);
 
2074         pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0);
 
2075         return ret ? ret : pending_ret;
 
2078 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 
2079                       struct btrfs_root *root,
 
2080                       u64 bytenr, u64 num_bytes, u64 parent,
 
2081                       u64 root_objectid, u64 ref_generation,
 
2082                       u64 owner_objectid, int pin)
 
2086         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
 
2087                                   root_objectid, ref_generation,
 
2088                                   owner_objectid, pin);
 
2092 static u64 stripe_align(struct btrfs_root *root, u64 val)
 
2094         u64 mask = ((u64)root->stripesize - 1);
 
2095         u64 ret = (val + mask) & ~mask;
 
2100  * walks the btree of allocated extents and find a hole of a given size.
 
2101  * The key ins is changed to record the hole:
 
2102  * ins->objectid == block start
 
2103  * ins->flags = BTRFS_EXTENT_ITEM_KEY
 
2104  * ins->offset == number of blocks
 
2105  * Any available blocks before search_start are skipped.
 
2107 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
 
2108                                      struct btrfs_root *orig_root,
 
2109                                      u64 num_bytes, u64 empty_size,
 
2110                                      u64 search_start, u64 search_end,
 
2111                                      u64 hint_byte, struct btrfs_key *ins,
 
2112                                      u64 exclude_start, u64 exclude_nr,
 
2116         struct btrfs_root * root = orig_root->fs_info->extent_root;
 
2117         u64 total_needed = num_bytes;
 
2118         u64 *last_ptr = NULL;
 
2119         u64 last_wanted = 0;
 
2120         struct btrfs_block_group_cache *block_group = NULL;
 
2121         int chunk_alloc_done = 0;
 
2122         int empty_cluster = 2 * 1024 * 1024;
 
2123         int allowed_chunk_alloc = 0;
 
2124         struct list_head *head = NULL, *cur = NULL;
 
2127         struct btrfs_space_info *space_info;
 
2129         WARN_ON(num_bytes < root->sectorsize);
 
2130         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
 
2134         if (orig_root->ref_cows || empty_size)
 
2135                 allowed_chunk_alloc = 1;
 
2137         if (data & BTRFS_BLOCK_GROUP_METADATA) {
 
2138                 last_ptr = &root->fs_info->last_alloc;
 
2139                 empty_cluster = 64 * 1024;
 
2142         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
 
2143                 last_ptr = &root->fs_info->last_data_alloc;
 
2147                         hint_byte = *last_ptr;
 
2148                         last_wanted = *last_ptr;
 
2150                         empty_size += empty_cluster;
 
2154         search_start = max(search_start, first_logical_byte(root, 0));
 
2155         search_start = max(search_start, hint_byte);
 
2157         if (last_wanted && search_start != last_wanted) {
 
2159                 empty_size += empty_cluster;
 
2162         total_needed += empty_size;
 
2163         block_group = btrfs_lookup_block_group(root->fs_info, search_start);
 
2165                 block_group = btrfs_lookup_first_block_group(root->fs_info,
 
2167         space_info = __find_space_info(root->fs_info, data);
 
2169         down_read(&space_info->groups_sem);
 
2171                 struct btrfs_free_space *free_space;
 
2173                  * the only way this happens if our hint points to a block
 
2174                  * group thats not of the proper type, while looping this
 
2175                  * should never happen
 
2181                         goto new_group_no_lock;
 
2183                 mutex_lock(&block_group->alloc_mutex);
 
2184                 if (unlikely(!block_group_bits(block_group, data)))
 
2187                 ret = cache_block_group(root, block_group);
 
2189                         mutex_unlock(&block_group->alloc_mutex);
 
2193                 if (block_group->ro)
 
2196                 free_space = btrfs_find_free_space(block_group, search_start,
 
2199                         u64 start = block_group->key.objectid;
 
2200                         u64 end = block_group->key.objectid +
 
2201                                 block_group->key.offset;
 
2203                         search_start = stripe_align(root, free_space->offset);
 
2205                         /* move on to the next group */
 
2206                         if (search_start + num_bytes >= search_end)
 
2209                         /* move on to the next group */
 
2210                         if (search_start + num_bytes > end)
 
2213                         if (last_wanted && search_start != last_wanted) {
 
2214                                 total_needed += empty_cluster;
 
2215                                 empty_size += empty_cluster;
 
2218                                  * if search_start is still in this block group
 
2219                                  * then we just re-search this block group
 
2221                                 if (search_start >= start &&
 
2222                                     search_start < end) {
 
2223                                         mutex_unlock(&block_group->alloc_mutex);
 
2227                                 /* else we go to the next block group */
 
2231                         if (exclude_nr > 0 &&
 
2232                             (search_start + num_bytes > exclude_start &&
 
2233                              search_start < exclude_start + exclude_nr)) {
 
2234                                 search_start = exclude_start + exclude_nr;
 
2236                                  * if search_start is still in this block group
 
2237                                  * then we just re-search this block group
 
2239                                 if (search_start >= start &&
 
2240                                     search_start < end) {
 
2241                                         mutex_unlock(&block_group->alloc_mutex);
 
2246                                 /* else we go to the next block group */
 
2250                         ins->objectid = search_start;
 
2251                         ins->offset = num_bytes;
 
2253                         btrfs_remove_free_space_lock(block_group, search_start,
 
2255                         /* we are all good, lets return */
 
2256                         mutex_unlock(&block_group->alloc_mutex);
 
2260                 mutex_unlock(&block_group->alloc_mutex);
 
2262                 /* don't try to compare new allocations against the
 
2263                  * last allocation any more
 
2268                  * Here's how this works.
 
2269                  * loop == 0: we were searching a block group via a hint
 
2270                  *              and didn't find anything, so we start at
 
2271                  *              the head of the block groups and keep searching
 
2272                  * loop == 1: we're searching through all of the block groups
 
2273                  *              if we hit the head again we have searched
 
2274                  *              all of the block groups for this space and we
 
2275                  *              need to try and allocate, if we cant error out.
 
2276                  * loop == 2: we allocated more space and are looping through
 
2277                  *              all of the block groups again.
 
2280                         head = &space_info->block_groups;
 
2283                 } else if (loop == 1 && cur == head) {
 
2286                         /* at this point we give up on the empty_size
 
2287                          * allocations and just try to allocate the min
 
2290                          * The extra_loop field was set if an empty_size
 
2291                          * allocation was attempted above, and if this
 
2292                          * is try we need to try the loop again without
 
2293                          * the additional empty_size.
 
2295                         total_needed -= empty_size;
 
2297                         keep_going = extra_loop;
 
2300                         if (allowed_chunk_alloc && !chunk_alloc_done) {
 
2301                                 up_read(&space_info->groups_sem);
 
2302                                 ret = do_chunk_alloc(trans, root, num_bytes +
 
2303                                                      2 * 1024 * 1024, data, 1);
 
2306                                 down_read(&space_info->groups_sem);
 
2307                                 head = &space_info->block_groups;
 
2309                                  * we've allocated a new chunk, keep
 
2313                                 chunk_alloc_done = 1;
 
2314                         } else if (!allowed_chunk_alloc) {
 
2315                                 space_info->force_alloc = 1;
 
2323                 } else if (cur == head) {
 
2327                 block_group = list_entry(cur, struct btrfs_block_group_cache,
 
2329                 search_start = block_group->key.objectid;
 
2333         /* we found what we needed */
 
2334         if (ins->objectid) {
 
2335                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
 
2336                         trans->block_group = block_group;
 
2339                         *last_ptr = ins->objectid + ins->offset;
 
2345         up_read(&space_info->groups_sem);
 
2349 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
 
2351         struct btrfs_block_group_cache *cache;
 
2352         struct list_head *l;
 
2354         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
 
2355                info->total_bytes - info->bytes_used - info->bytes_pinned -
 
2356                info->bytes_reserved, (info->full) ? "" : "not ");
 
2358         down_read(&info->groups_sem);
 
2359         list_for_each(l, &info->block_groups) {
 
2360                 cache = list_entry(l, struct btrfs_block_group_cache, list);
 
2361                 spin_lock(&cache->lock);
 
2362                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
 
2363                        "%Lu pinned %Lu reserved\n",
 
2364                        cache->key.objectid, cache->key.offset,
 
2365                        btrfs_block_group_used(&cache->item),
 
2366                        cache->pinned, cache->reserved);
 
2367                 btrfs_dump_free_space(cache, bytes);
 
2368                 spin_unlock(&cache->lock);
 
2370         up_read(&info->groups_sem);
 
2373 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 
2374                                   struct btrfs_root *root,
 
2375                                   u64 num_bytes, u64 min_alloc_size,
 
2376                                   u64 empty_size, u64 hint_byte,
 
2377                                   u64 search_end, struct btrfs_key *ins,
 
2381         u64 search_start = 0;
 
2383         struct btrfs_fs_info *info = root->fs_info;
 
2386                 alloc_profile = info->avail_data_alloc_bits &
 
2387                                 info->data_alloc_profile;
 
2388                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
 
2389         } else if (root == root->fs_info->chunk_root) {
 
2390                 alloc_profile = info->avail_system_alloc_bits &
 
2391                                 info->system_alloc_profile;
 
2392                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
 
2394                 alloc_profile = info->avail_metadata_alloc_bits &
 
2395                                 info->metadata_alloc_profile;
 
2396                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
 
2399         data = reduce_alloc_profile(root, data);
 
2401          * the only place that sets empty_size is btrfs_realloc_node, which
 
2402          * is not called recursively on allocations
 
2404         if (empty_size || root->ref_cows) {
 
2405                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
 
2406                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 
2408                                      BTRFS_BLOCK_GROUP_METADATA |
 
2409                                      (info->metadata_alloc_profile &
 
2410                                       info->avail_metadata_alloc_bits), 0);
 
2412                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 
2413                                      num_bytes + 2 * 1024 * 1024, data, 0);
 
2416         WARN_ON(num_bytes < root->sectorsize);
 
2417         ret = find_free_extent(trans, root, num_bytes, empty_size,
 
2418                                search_start, search_end, hint_byte, ins,
 
2419                                trans->alloc_exclude_start,
 
2420                                trans->alloc_exclude_nr, data);
 
2422         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
 
2423                 num_bytes = num_bytes >> 1;
 
2424                 num_bytes = num_bytes & ~(root->sectorsize - 1);
 
2425                 num_bytes = max(num_bytes, min_alloc_size);
 
2426                 do_chunk_alloc(trans, root->fs_info->extent_root,
 
2427                                num_bytes, data, 1);
 
2431                 struct btrfs_space_info *sinfo;
 
2433                 sinfo = __find_space_info(root->fs_info, data);
 
2434                 printk("allocation failed flags %Lu, wanted %Lu\n",
 
2436                 dump_space_info(sinfo, num_bytes);
 
2443 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
 
2445         struct btrfs_block_group_cache *cache;
 
2447         cache = btrfs_lookup_block_group(root->fs_info, start);
 
2449                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
 
2452         btrfs_add_free_space(cache, start, len);
 
2453         update_reserved_extents(root, start, len, 0);
 
2457 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 
2458                                   struct btrfs_root *root,
 
2459                                   u64 num_bytes, u64 min_alloc_size,
 
2460                                   u64 empty_size, u64 hint_byte,
 
2461                                   u64 search_end, struct btrfs_key *ins,
 
2465         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
 
2466                                      empty_size, hint_byte, search_end, ins,
 
2468         update_reserved_extents(root, ins->objectid, ins->offset, 1);
 
2472 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
 
2473                                          struct btrfs_root *root, u64 parent,
 
2474                                          u64 root_objectid, u64 ref_generation,
 
2475                                          u64 owner, struct btrfs_key *ins)
 
2481         u64 num_bytes = ins->offset;
 
2483         struct btrfs_fs_info *info = root->fs_info;
 
2484         struct btrfs_root *extent_root = info->extent_root;
 
2485         struct btrfs_extent_item *extent_item;
 
2486         struct btrfs_extent_ref *ref;
 
2487         struct btrfs_path *path;
 
2488         struct btrfs_key keys[2];
 
2491                 parent = ins->objectid;
 
2493         /* block accounting for super block */
 
2494         spin_lock_irq(&info->delalloc_lock);
 
2495         super_used = btrfs_super_bytes_used(&info->super_copy);
 
2496         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
 
2497         spin_unlock_irq(&info->delalloc_lock);
 
2499         /* block accounting for root item */
 
2500         root_used = btrfs_root_used(&root->root_item);
 
2501         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
 
2503         if (root == extent_root) {
 
2504                 struct pending_extent_op *extent_op;
 
2506                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
 
2509                 extent_op->type = PENDING_EXTENT_INSERT;
 
2510                 extent_op->bytenr = ins->objectid;
 
2511                 extent_op->num_bytes = ins->offset;
 
2512                 extent_op->parent = parent;
 
2513                 extent_op->orig_parent = 0;
 
2514                 extent_op->generation = ref_generation;
 
2515                 extent_op->orig_generation = 0;
 
2516                 extent_op->level = (int)owner;
 
2518                 mutex_lock(&root->fs_info->extent_ins_mutex);
 
2519                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
 
2520                                 ins->objectid + ins->offset - 1,
 
2521                                 EXTENT_WRITEBACK, GFP_NOFS);
 
2522                 set_state_private(&root->fs_info->extent_ins,
 
2523                                   ins->objectid, (unsigned long)extent_op);
 
2524                 mutex_unlock(&root->fs_info->extent_ins_mutex);
 
2528         memcpy(&keys[0], ins, sizeof(*ins));
 
2529         keys[1].objectid = ins->objectid;
 
2530         keys[1].type = BTRFS_EXTENT_REF_KEY;
 
2531         keys[1].offset = parent;
 
2532         sizes[0] = sizeof(*extent_item);
 
2533         sizes[1] = sizeof(*ref);
 
2535         path = btrfs_alloc_path();
 
2538         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
 
2542         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
 
2543                                      struct btrfs_extent_item);
 
2544         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
 
2545         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
 
2546                              struct btrfs_extent_ref);
 
2548         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
 
2549         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
 
2550         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
 
2551         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
 
2553         btrfs_mark_buffer_dirty(path->nodes[0]);
 
2555         trans->alloc_exclude_start = 0;
 
2556         trans->alloc_exclude_nr = 0;
 
2557         btrfs_free_path(path);
 
2558         finish_current_insert(trans, extent_root, 0);
 
2559         pending_ret = del_pending_extents(trans, extent_root, 0);
 
2569         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
 
2571                 printk("update block group failed for %Lu %Lu\n",
 
2572                        ins->objectid, ins->offset);
 
2579 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
 
2580                                 struct btrfs_root *root, u64 parent,
 
2581                                 u64 root_objectid, u64 ref_generation,
 
2582                                 u64 owner, struct btrfs_key *ins)
 
2586         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
 
2588         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
 
2589                                             ref_generation, owner, ins);
 
2590         update_reserved_extents(root, ins->objectid, ins->offset, 0);
 
2595  * this is used by the tree logging recovery code.  It records that
 
2596  * an extent has been allocated and makes sure to clear the free
 
2597  * space cache bits as well
 
2599 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
 
2600                                 struct btrfs_root *root, u64 parent,
 
2601                                 u64 root_objectid, u64 ref_generation,
 
2602                                 u64 owner, struct btrfs_key *ins)
 
2605         struct btrfs_block_group_cache *block_group;
 
2607         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
 
2608         mutex_lock(&block_group->alloc_mutex);
 
2609         cache_block_group(root, block_group);
 
2611         ret = btrfs_remove_free_space_lock(block_group, ins->objectid,
 
2613         mutex_unlock(&block_group->alloc_mutex);
 
2615         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
 
2616                                             ref_generation, owner, ins);
 
2621  * finds a free extent and does all the dirty work required for allocation
 
2622  * returns the key for the extent through ins, and a tree buffer for
 
2623  * the first block of the extent through buf.
 
2625  * returns 0 if everything worked, non-zero otherwise.
 
2627 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
 
2628                        struct btrfs_root *root,
 
2629                        u64 num_bytes, u64 parent, u64 min_alloc_size,
 
2630                        u64 root_objectid, u64 ref_generation,
 
2631                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
 
2632                        u64 search_end, struct btrfs_key *ins, u64 data)
 
2636         ret = __btrfs_reserve_extent(trans, root, num_bytes,
 
2637                                      min_alloc_size, empty_size, hint_byte,
 
2638                                      search_end, ins, data);
 
2640         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
 
2641                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
 
2642                                         root_objectid, ref_generation,
 
2643                                         owner_objectid, ins);
 
2647                 update_reserved_extents(root, ins->objectid, ins->offset, 1);
 
2652 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 
2653                                             struct btrfs_root *root,
 
2654                                             u64 bytenr, u32 blocksize)
 
2656         struct extent_buffer *buf;
 
2658         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
 
2660                 return ERR_PTR(-ENOMEM);
 
2661         btrfs_set_header_generation(buf, trans->transid);
 
2662         btrfs_tree_lock(buf);
 
2663         clean_tree_block(trans, root, buf);
 
2664         btrfs_set_buffer_uptodate(buf);
 
2665         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
 
2666                 set_extent_dirty(&root->dirty_log_pages, buf->start,
 
2667                          buf->start + buf->len - 1, GFP_NOFS);
 
2669                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 
2670                          buf->start + buf->len - 1, GFP_NOFS);
 
2672         trans->blocks_used++;
 
2677  * helper function to allocate a block for a given tree
 
2678  * returns the tree buffer or NULL.
 
2680 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 
2681                                              struct btrfs_root *root,
 
2682                                              u32 blocksize, u64 parent,
 
2689         struct btrfs_key ins;
 
2691         struct extent_buffer *buf;
 
2693         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
 
2694                                  root_objectid, ref_generation, level,
 
2695                                  empty_size, hint, (u64)-1, &ins, 0);
 
2698                 return ERR_PTR(ret);
 
2701         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
 
2705 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
 
2706                         struct btrfs_root *root, struct extent_buffer *leaf)
 
2709         u64 leaf_generation;
 
2710         struct btrfs_key key;
 
2711         struct btrfs_file_extent_item *fi;
 
2716         BUG_ON(!btrfs_is_leaf(leaf));
 
2717         nritems = btrfs_header_nritems(leaf);
 
2718         leaf_owner = btrfs_header_owner(leaf);
 
2719         leaf_generation = btrfs_header_generation(leaf);
 
2721         for (i = 0; i < nritems; i++) {
 
2725                 btrfs_item_key_to_cpu(leaf, &key, i);
 
2726                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 
2728                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
 
2729                 if (btrfs_file_extent_type(leaf, fi) ==
 
2730                     BTRFS_FILE_EXTENT_INLINE)
 
2733                  * FIXME make sure to insert a trans record that
 
2734                  * repeats the snapshot del on crash
 
2736                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 
2737                 if (disk_bytenr == 0)
 
2740                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
 
2741                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
 
2742                                 leaf->start, leaf_owner, leaf_generation,
 
2746                 atomic_inc(&root->fs_info->throttle_gen);
 
2747                 wake_up(&root->fs_info->transaction_throttle);
 
2753 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
 
2754                                         struct btrfs_root *root,
 
2755                                         struct btrfs_leaf_ref *ref)
 
2759         struct btrfs_extent_info *info = ref->extents;
 
2761         for (i = 0; i < ref->nritems; i++) {
 
2762                 ret = __btrfs_free_extent(trans, root, info->bytenr,
 
2763                                           info->num_bytes, ref->bytenr,
 
2764                                           ref->owner, ref->generation,
 
2767                 atomic_inc(&root->fs_info->throttle_gen);
 
2768                 wake_up(&root->fs_info->transaction_throttle);
 
2778 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
 
2783         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
 
2786 #if 0 // some debugging code in case we see problems here
 
2787         /* if the refs count is one, it won't get increased again.  But
 
2788          * if the ref count is > 1, someone may be decreasing it at
 
2789          * the same time we are.
 
2792                 struct extent_buffer *eb = NULL;
 
2793                 eb = btrfs_find_create_tree_block(root, start, len);
 
2795                         btrfs_tree_lock(eb);
 
2797                 mutex_lock(&root->fs_info->alloc_mutex);
 
2798                 ret = lookup_extent_ref(NULL, root, start, len, refs);
 
2800                 mutex_unlock(&root->fs_info->alloc_mutex);
 
2803                         btrfs_tree_unlock(eb);
 
2804                         free_extent_buffer(eb);
 
2807                         printk("block %llu went down to one during drop_snap\n",
 
2808                                (unsigned long long)start);
 
2819  * helper function for drop_snapshot, this walks down the tree dropping ref
 
2820  * counts as it goes.
 
2822 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
 
2823                                    struct btrfs_root *root,
 
2824                                    struct btrfs_path *path, int *level)
 
2830         struct extent_buffer *next;
 
2831         struct extent_buffer *cur;
 
2832         struct extent_buffer *parent;
 
2833         struct btrfs_leaf_ref *ref;
 
2838         WARN_ON(*level < 0);
 
2839         WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
2840         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
 
2841                                 path->nodes[*level]->len, &refs);
 
2847          * walk down to the last node level and free all the leaves
 
2849         while(*level >= 0) {
 
2850                 WARN_ON(*level < 0);
 
2851                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
2852                 cur = path->nodes[*level];
 
2854                 if (btrfs_header_level(cur) != *level)
 
2857                 if (path->slots[*level] >=
 
2858                     btrfs_header_nritems(cur))
 
2861                         ret = btrfs_drop_leaf_ref(trans, root, cur);
 
2865                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
 
2866                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
 
2867                 blocksize = btrfs_level_size(root, *level - 1);
 
2869                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
 
2872                         parent = path->nodes[*level];
 
2873                         root_owner = btrfs_header_owner(parent);
 
2874                         root_gen = btrfs_header_generation(parent);
 
2875                         path->slots[*level]++;
 
2877                         ret = __btrfs_free_extent(trans, root, bytenr,
 
2878                                                 blocksize, parent->start,
 
2879                                                 root_owner, root_gen,
 
2883                         atomic_inc(&root->fs_info->throttle_gen);
 
2884                         wake_up(&root->fs_info->transaction_throttle);
 
2890                  * at this point, we have a single ref, and since the
 
2891                  * only place referencing this extent is a dead root
 
2892                  * the reference count should never go higher.
 
2893                  * So, we don't need to check it again
 
2896                         ref = btrfs_lookup_leaf_ref(root, bytenr);
 
2897                         if (ref && ref->generation != ptr_gen) {
 
2898                                 btrfs_free_leaf_ref(root, ref);
 
2902                                 ret = cache_drop_leaf_ref(trans, root, ref);
 
2904                                 btrfs_remove_leaf_ref(root, ref);
 
2905                                 btrfs_free_leaf_ref(root, ref);
 
2909                         if (printk_ratelimit()) {
 
2910                                 printk("leaf ref miss for bytenr %llu\n",
 
2911                                        (unsigned long long)bytenr);
 
2914                 next = btrfs_find_tree_block(root, bytenr, blocksize);
 
2915                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
 
2916                         free_extent_buffer(next);
 
2918                         next = read_tree_block(root, bytenr, blocksize,
 
2923                          * this is a debugging check and can go away
 
2924                          * the ref should never go all the way down to 1
 
2927                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
 
2933                 WARN_ON(*level <= 0);
 
2934                 if (path->nodes[*level-1])
 
2935                         free_extent_buffer(path->nodes[*level-1]);
 
2936                 path->nodes[*level-1] = next;
 
2937                 *level = btrfs_header_level(next);
 
2938                 path->slots[*level] = 0;
 
2942         WARN_ON(*level < 0);
 
2943         WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
2945         if (path->nodes[*level] == root->node) {
 
2946                 parent = path->nodes[*level];
 
2947                 bytenr = path->nodes[*level]->start;
 
2949                 parent = path->nodes[*level + 1];
 
2950                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
 
2953         blocksize = btrfs_level_size(root, *level);
 
2954         root_owner = btrfs_header_owner(parent);
 
2955         root_gen = btrfs_header_generation(parent);
 
2957         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
 
2958                                   parent->start, root_owner, root_gen,
 
2960         free_extent_buffer(path->nodes[*level]);
 
2961         path->nodes[*level] = NULL;
 
2970  * helper function for drop_subtree, this function is similar to
 
2971  * walk_down_tree. The main difference is that it checks reference
 
2972  * counts while tree blocks are locked.
 
2974 static int noinline walk_down_subtree(struct btrfs_trans_handle *trans,
 
2975                                       struct btrfs_root *root,
 
2976                                       struct btrfs_path *path, int *level)
 
2978         struct extent_buffer *next;
 
2979         struct extent_buffer *cur;
 
2980         struct extent_buffer *parent;
 
2987         cur = path->nodes[*level];
 
2988         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
 
2994         while (*level >= 0) {
 
2995                 cur = path->nodes[*level];
 
2997                         ret = btrfs_drop_leaf_ref(trans, root, cur);
 
2999                         clean_tree_block(trans, root, cur);
 
3002                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
 
3003                         clean_tree_block(trans, root, cur);
 
3007                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
 
3008                 blocksize = btrfs_level_size(root, *level - 1);
 
3009                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
 
3011                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
 
3012                 btrfs_tree_lock(next);
 
3014                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
 
3018                         parent = path->nodes[*level];
 
3019                         ret = btrfs_free_extent(trans, root, bytenr,
 
3020                                         blocksize, parent->start,
 
3021                                         btrfs_header_owner(parent),
 
3022                                         btrfs_header_generation(parent),
 
3025                         path->slots[*level]++;
 
3026                         btrfs_tree_unlock(next);
 
3027                         free_extent_buffer(next);
 
3031                 *level = btrfs_header_level(next);
 
3032                 path->nodes[*level] = next;
 
3033                 path->slots[*level] = 0;
 
3034                 path->locks[*level] = 1;
 
3038         parent = path->nodes[*level + 1];
 
3039         bytenr = path->nodes[*level]->start;
 
3040         blocksize = path->nodes[*level]->len;
 
3042         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
 
3043                         parent->start, btrfs_header_owner(parent),
 
3044                         btrfs_header_generation(parent), *level, 1);
 
3047         if (path->locks[*level]) {
 
3048                 btrfs_tree_unlock(path->nodes[*level]);
 
3049                 path->locks[*level] = 0;
 
3051         free_extent_buffer(path->nodes[*level]);
 
3052         path->nodes[*level] = NULL;
 
3059  * helper for dropping snapshots.  This walks back up the tree in the path
 
3060  * to find the first node higher up where we haven't yet gone through
 
3063 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
 
3064                                  struct btrfs_root *root,
 
3065                                  struct btrfs_path *path,
 
3066                                  int *level, int max_level)
 
3070         struct btrfs_root_item *root_item = &root->root_item;
 
3075         for (i = *level; i < max_level && path->nodes[i]; i++) {
 
3076                 slot = path->slots[i];
 
3077                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
 
3078                         struct extent_buffer *node;
 
3079                         struct btrfs_disk_key disk_key;
 
3080                         node = path->nodes[i];
 
3083                         WARN_ON(*level == 0);
 
3084                         btrfs_node_key(node, &disk_key, path->slots[i]);
 
3085                         memcpy(&root_item->drop_progress,
 
3086                                &disk_key, sizeof(disk_key));
 
3087                         root_item->drop_level = i;
 
3090                         struct extent_buffer *parent;
 
3091                         if (path->nodes[*level] == root->node)
 
3092                                 parent = path->nodes[*level];
 
3094                                 parent = path->nodes[*level + 1];
 
3096                         root_owner = btrfs_header_owner(parent);
 
3097                         root_gen = btrfs_header_generation(parent);
 
3099                         clean_tree_block(trans, root, path->nodes[*level]);
 
3100                         ret = btrfs_free_extent(trans, root,
 
3101                                                 path->nodes[*level]->start,
 
3102                                                 path->nodes[*level]->len,
 
3103                                                 parent->start, root_owner,
 
3104                                                 root_gen, *level, 1);
 
3106                         if (path->locks[*level]) {
 
3107                                 btrfs_tree_unlock(path->nodes[*level]);
 
3108                                 path->locks[*level] = 0;
 
3110                         free_extent_buffer(path->nodes[*level]);
 
3111                         path->nodes[*level] = NULL;
 
3119  * drop the reference count on the tree rooted at 'snap'.  This traverses
 
3120  * the tree freeing any blocks that have a ref count of zero after being
 
3123 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
 
3129         struct btrfs_path *path;
 
3132         struct btrfs_root_item *root_item = &root->root_item;
 
3134         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
 
3135         path = btrfs_alloc_path();
 
3138         level = btrfs_header_level(root->node);
 
3140         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
 
3141                 path->nodes[level] = root->node;
 
3142                 extent_buffer_get(root->node);
 
3143                 path->slots[level] = 0;
 
3145                 struct btrfs_key key;
 
3146                 struct btrfs_disk_key found_key;
 
3147                 struct extent_buffer *node;
 
3149                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
 
3150                 level = root_item->drop_level;
 
3151                 path->lowest_level = level;
 
3152                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 
3157                 node = path->nodes[level];
 
3158                 btrfs_node_key(node, &found_key, path->slots[level]);
 
3159                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
 
3160                                sizeof(found_key)));
 
3162                  * unlock our path, this is safe because only this
 
3163                  * function is allowed to delete this snapshot
 
3165                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
 
3166                         if (path->nodes[i] && path->locks[i]) {
 
3168                                 btrfs_tree_unlock(path->nodes[i]);
 
3173                 wret = walk_down_tree(trans, root, path, &level);
 
3179                 wret = walk_up_tree(trans, root, path, &level,
 
3185                 if (trans->transaction->in_commit) {
 
3189                 atomic_inc(&root->fs_info->throttle_gen);
 
3190                 wake_up(&root->fs_info->transaction_throttle);
 
3192         for (i = 0; i <= orig_level; i++) {
 
3193                 if (path->nodes[i]) {
 
3194                         free_extent_buffer(path->nodes[i]);
 
3195                         path->nodes[i] = NULL;
 
3199         btrfs_free_path(path);
 
3203 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
 
3204                         struct btrfs_root *root,
 
3205                         struct extent_buffer *node,
 
3206                         struct extent_buffer *parent)
 
3208         struct btrfs_path *path;
 
3214         path = btrfs_alloc_path();
 
3217         BUG_ON(!btrfs_tree_locked(parent));
 
3218         parent_level = btrfs_header_level(parent);
 
3219         extent_buffer_get(parent);
 
3220         path->nodes[parent_level] = parent;
 
3221         path->slots[parent_level] = btrfs_header_nritems(parent);
 
3223         BUG_ON(!btrfs_tree_locked(node));
 
3224         level = btrfs_header_level(node);
 
3225         extent_buffer_get(node);
 
3226         path->nodes[level] = node;
 
3227         path->slots[level] = 0;
 
3230                 wret = walk_down_subtree(trans, root, path, &level);
 
3236                 wret = walk_up_tree(trans, root, path, &level, parent_level);
 
3243         btrfs_free_path(path);
 
3247 static unsigned long calc_ra(unsigned long start, unsigned long last,
 
3250         return min(last, start + nr - 1);
 
3253 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
 
3258         unsigned long first_index;
 
3259         unsigned long last_index;
 
3262         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 
3263         struct file_ra_state *ra;
 
3264         struct btrfs_ordered_extent *ordered;
 
3265         unsigned int total_read = 0;
 
3266         unsigned int total_dirty = 0;
 
3269         ra = kzalloc(sizeof(*ra), GFP_NOFS);
 
3271         mutex_lock(&inode->i_mutex);
 
3272         first_index = start >> PAGE_CACHE_SHIFT;
 
3273         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
 
3275         /* make sure the dirty trick played by the caller work */
 
3276         ret = invalidate_inode_pages2_range(inode->i_mapping,
 
3277                                             first_index, last_index);
 
3281         file_ra_state_init(ra, inode->i_mapping);
 
3283         for (i = first_index ; i <= last_index; i++) {
 
3284                 if (total_read % ra->ra_pages == 0) {
 
3285                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
 
3286                                        calc_ra(i, last_index, ra->ra_pages));
 
3290                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
 
3292                 page = grab_cache_page(inode->i_mapping, i);
 
3297                 if (!PageUptodate(page)) {
 
3298                         btrfs_readpage(NULL, page);
 
3300                         if (!PageUptodate(page)) {
 
3302                                 page_cache_release(page);
 
3307                 wait_on_page_writeback(page);
 
3309                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
 
3310                 page_end = page_start + PAGE_CACHE_SIZE - 1;
 
3311                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
3313                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
 
3315                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
3317                         page_cache_release(page);
 
3318                         btrfs_start_ordered_extent(inode, ordered, 1);
 
3319                         btrfs_put_ordered_extent(ordered);
 
3322                 set_page_extent_mapped(page);
 
3324                 btrfs_set_extent_delalloc(inode, page_start, page_end);
 
3325                 if (i == first_index)
 
3326                         set_extent_bits(io_tree, page_start, page_end,
 
3327                                         EXTENT_BOUNDARY, GFP_NOFS);
 
3329                 set_page_dirty(page);
 
3332                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
3334                 page_cache_release(page);
 
3339         mutex_unlock(&inode->i_mutex);
 
3340         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
 
3344 static int noinline relocate_data_extent(struct inode *reloc_inode,
 
3345                                          struct btrfs_key *extent_key,
 
3348         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
 
3349         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
 
3350         struct extent_map *em;
 
3351         u64 start = extent_key->objectid - offset;
 
3352         u64 end = start + extent_key->offset - 1;
 
3354         em = alloc_extent_map(GFP_NOFS);
 
3355         BUG_ON(!em || IS_ERR(em));
 
3358         em->len = extent_key->offset;
 
3359         em->block_len = extent_key->offset;
 
3360         em->block_start = extent_key->objectid;
 
3361         em->bdev = root->fs_info->fs_devices->latest_bdev;
 
3362         set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
3364         /* setup extent map to cheat btrfs_readpage */
 
3365         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
 
3368                 spin_lock(&em_tree->lock);
 
3369                 ret = add_extent_mapping(em_tree, em);
 
3370                 spin_unlock(&em_tree->lock);
 
3371                 if (ret != -EEXIST) {
 
3372                         free_extent_map(em);
 
3375                 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
 
3377         unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
 
3379         return relocate_inode_pages(reloc_inode, start, extent_key->offset);
 
3382 struct btrfs_ref_path {
 
3384         u64 nodes[BTRFS_MAX_LEVEL];
 
3386         u64 root_generation;
 
3393         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
 
3394         u64 new_nodes[BTRFS_MAX_LEVEL];
 
3397 struct disk_extent {
 
3408 static int is_cowonly_root(u64 root_objectid)
 
3410         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
 
3411             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
 
3412             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
 
3413             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
 
3414             root_objectid == BTRFS_TREE_LOG_OBJECTID)
 
3419 static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
 
3420                                     struct btrfs_root *extent_root,
 
3421                                     struct btrfs_ref_path *ref_path,
 
3424         struct extent_buffer *leaf;
 
3425         struct btrfs_path *path;
 
3426         struct btrfs_extent_ref *ref;
 
3427         struct btrfs_key key;
 
3428         struct btrfs_key found_key;
 
3434         path = btrfs_alloc_path();
 
3439                 ref_path->lowest_level = -1;
 
3440                 ref_path->current_level = -1;
 
3441                 ref_path->shared_level = -1;
 
3445         level = ref_path->current_level - 1;
 
3446         while (level >= -1) {
 
3448                 if (level < ref_path->lowest_level)
 
3452                         bytenr = ref_path->nodes[level];
 
3454                         bytenr = ref_path->extent_start;
 
3456                 BUG_ON(bytenr == 0);
 
3458                 parent = ref_path->nodes[level + 1];
 
3459                 ref_path->nodes[level + 1] = 0;
 
3460                 ref_path->current_level = level;
 
3461                 BUG_ON(parent == 0);
 
3463                 key.objectid = bytenr;
 
3464                 key.offset = parent + 1;
 
3465                 key.type = BTRFS_EXTENT_REF_KEY;
 
3467                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
 
3472                 leaf = path->nodes[0];
 
3473                 nritems = btrfs_header_nritems(leaf);
 
3474                 if (path->slots[0] >= nritems) {
 
3475                         ret = btrfs_next_leaf(extent_root, path);
 
3480                         leaf = path->nodes[0];
 
3483                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
3484                 if (found_key.objectid == bytenr &&
 
3485                     found_key.type == BTRFS_EXTENT_REF_KEY) {
 
3486                         if (level < ref_path->shared_level)
 
3487                                 ref_path->shared_level = level;
 
3492                 btrfs_release_path(extent_root, path);
 
3495         /* reached lowest level */
 
3499         level = ref_path->current_level;
 
3500         while (level < BTRFS_MAX_LEVEL - 1) {
 
3503                         bytenr = ref_path->nodes[level];
 
3505                         bytenr = ref_path->extent_start;
 
3507                 BUG_ON(bytenr == 0);
 
3509                 key.objectid = bytenr;
 
3511                 key.type = BTRFS_EXTENT_REF_KEY;
 
3513                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
 
3517                 leaf = path->nodes[0];
 
3518                 nritems = btrfs_header_nritems(leaf);
 
3519                 if (path->slots[0] >= nritems) {
 
3520                         ret = btrfs_next_leaf(extent_root, path);
 
3524                                 /* the extent was freed by someone */
 
3525                                 if (ref_path->lowest_level == level)
 
3527                                 btrfs_release_path(extent_root, path);
 
3530                         leaf = path->nodes[0];
 
3533                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
3534                 if (found_key.objectid != bytenr ||
 
3535                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
 
3536                         /* the extent was freed by someone */
 
3537                         if (ref_path->lowest_level == level) {
 
3541                         btrfs_release_path(extent_root, path);
 
3545                 ref = btrfs_item_ptr(leaf, path->slots[0],
 
3546                                 struct btrfs_extent_ref);
 
3547                 ref_objectid = btrfs_ref_objectid(leaf, ref);
 
3548                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
 
3550                                 level = (int)ref_objectid;
 
3551                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
 
3552                                 ref_path->lowest_level = level;
 
3553                                 ref_path->current_level = level;
 
3554                                 ref_path->nodes[level] = bytenr;
 
3556                                 WARN_ON(ref_objectid != level);
 
3559                         WARN_ON(level != -1);
 
3563                 if (ref_path->lowest_level == level) {
 
3564                         ref_path->owner_objectid = ref_objectid;
 
3565                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
 
3569                  * the block is tree root or the block isn't in reference
 
3572                 if (found_key.objectid == found_key.offset ||
 
3573                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
 
3574                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
 
3575                         ref_path->root_generation =
 
3576                                 btrfs_ref_generation(leaf, ref);
 
3578                                 /* special reference from the tree log */
 
3579                                 ref_path->nodes[0] = found_key.offset;
 
3580                                 ref_path->current_level = 0;
 
3587                 BUG_ON(ref_path->nodes[level] != 0);
 
3588                 ref_path->nodes[level] = found_key.offset;
 
3589                 ref_path->current_level = level;
 
3592                  * the reference was created in the running transaction,
 
3593                  * no need to continue walking up.
 
3595                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
 
3596                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
 
3597                         ref_path->root_generation =
 
3598                                 btrfs_ref_generation(leaf, ref);
 
3603                 btrfs_release_path(extent_root, path);
 
3606         /* reached max tree level, but no tree root found. */
 
3609         btrfs_free_path(path);
 
3613 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
 
3614                                 struct btrfs_root *extent_root,
 
3615                                 struct btrfs_ref_path *ref_path,
 
3618         memset(ref_path, 0, sizeof(*ref_path));
 
3619         ref_path->extent_start = extent_start;
 
3621         return __next_ref_path(trans, extent_root, ref_path, 1);
 
3624 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
 
3625                                struct btrfs_root *extent_root,
 
3626                                struct btrfs_ref_path *ref_path)
 
3628         return __next_ref_path(trans, extent_root, ref_path, 0);
 
3631 static int noinline get_new_locations(struct inode *reloc_inode,
 
3632                                       struct btrfs_key *extent_key,
 
3633                                       u64 offset, int no_fragment,
 
3634                                       struct disk_extent **extents,
 
3637         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
 
3638         struct btrfs_path *path;
 
3639         struct btrfs_file_extent_item *fi;
 
3640         struct extent_buffer *leaf;
 
3641         struct disk_extent *exts = *extents;
 
3642         struct btrfs_key found_key;
 
3647         int max = *nr_extents;
 
3650         WARN_ON(!no_fragment && *extents);
 
3653                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
 
3658         path = btrfs_alloc_path();
 
3661         cur_pos = extent_key->objectid - offset;
 
3662         last_byte = extent_key->objectid + extent_key->offset;
 
3663         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
 
3673                 leaf = path->nodes[0];
 
3674                 nritems = btrfs_header_nritems(leaf);
 
3675                 if (path->slots[0] >= nritems) {
 
3676                         ret = btrfs_next_leaf(root, path);
 
3681                         leaf = path->nodes[0];
 
3684                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
3685                 if (found_key.offset != cur_pos ||
 
3686                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
 
3687                     found_key.objectid != reloc_inode->i_ino)
 
3690                 fi = btrfs_item_ptr(leaf, path->slots[0],
 
3691                                     struct btrfs_file_extent_item);
 
3692                 if (btrfs_file_extent_type(leaf, fi) !=
 
3693                     BTRFS_FILE_EXTENT_REG ||
 
3694                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
 
3698                         struct disk_extent *old = exts;
 
3700                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
 
3701                         memcpy(exts, old, sizeof(*exts) * nr);
 
3702                         if (old != *extents)
 
3706                 exts[nr].disk_bytenr =
 
3707                         btrfs_file_extent_disk_bytenr(leaf, fi);
 
3708                 exts[nr].disk_num_bytes =
 
3709                         btrfs_file_extent_disk_num_bytes(leaf, fi);
 
3710                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
 
3711                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 
3712                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 
3713                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
 
3714                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
 
3715                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
 
3717                 BUG_ON(exts[nr].offset > 0);
 
3718                 BUG_ON(exts[nr].compression || exts[nr].encryption);
 
3719                 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
 
3721                 cur_pos += exts[nr].num_bytes;
 
3724                 if (cur_pos + offset >= last_byte)
 
3734         WARN_ON(cur_pos + offset > last_byte);
 
3735         if (cur_pos + offset < last_byte) {
 
3741         btrfs_free_path(path);
 
3743                 if (exts != *extents)
 
3752 static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
 
3753                                         struct btrfs_root *root,
 
3754                                         struct btrfs_path *path,
 
3755                                         struct btrfs_key *extent_key,
 
3756                                         struct btrfs_key *leaf_key,
 
3757                                         struct btrfs_ref_path *ref_path,
 
3758                                         struct disk_extent *new_extents,
 
3761         struct extent_buffer *leaf;
 
3762         struct btrfs_file_extent_item *fi;
 
3763         struct inode *inode = NULL;
 
3764         struct btrfs_key key;
 
3772         int extent_locked = 0;
 
3776         memcpy(&key, leaf_key, sizeof(key));
 
3777         first_pos = INT_LIMIT(loff_t) - extent_key->offset;
 
3778         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
 
3779                 if (key.objectid < ref_path->owner_objectid ||
 
3780                     (key.objectid == ref_path->owner_objectid &&
 
3781                      key.type < BTRFS_EXTENT_DATA_KEY)) {
 
3782                         key.objectid = ref_path->owner_objectid;
 
3783                         key.type = BTRFS_EXTENT_DATA_KEY;
 
3789                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 
3793                 leaf = path->nodes[0];
 
3794                 nritems = btrfs_header_nritems(leaf);
 
3796                 if (extent_locked && ret > 0) {
 
3798                          * the file extent item was modified by someone
 
3799                          * before the extent got locked.
 
3801                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 
3802                                       lock_end, GFP_NOFS);
 
3806                 if (path->slots[0] >= nritems) {
 
3807                         if (++nr_scaned > 2)
 
3810                         BUG_ON(extent_locked);
 
3811                         ret = btrfs_next_leaf(root, path);
 
3816                         leaf = path->nodes[0];
 
3817                         nritems = btrfs_header_nritems(leaf);
 
3820                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
3822                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
 
3823                         if ((key.objectid > ref_path->owner_objectid) ||
 
3824                             (key.objectid == ref_path->owner_objectid &&
 
3825                              key.type > BTRFS_EXTENT_DATA_KEY) ||
 
3826                             (key.offset >= first_pos + extent_key->offset))
 
3830                 if (inode && key.objectid != inode->i_ino) {
 
3831                         BUG_ON(extent_locked);
 
3832                         btrfs_release_path(root, path);
 
3833                         mutex_unlock(&inode->i_mutex);
 
3839                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
 
3844                 fi = btrfs_item_ptr(leaf, path->slots[0],
 
3845                                     struct btrfs_file_extent_item);
 
3846                 extent_type = btrfs_file_extent_type(leaf, fi);
 
3847                 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
 
3848                      extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
 
3849                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
 
3850                      extent_key->objectid)) {
 
3856                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 
3857                 ext_offset = btrfs_file_extent_offset(leaf, fi);
 
3859                 if (first_pos > key.offset - ext_offset)
 
3860                         first_pos = key.offset - ext_offset;
 
3862                 if (!extent_locked) {
 
3863                         lock_start = key.offset;
 
3864                         lock_end = lock_start + num_bytes - 1;
 
3866                         if (lock_start > key.offset ||
 
3867                             lock_end + 1 < key.offset + num_bytes) {
 
3868                                 unlock_extent(&BTRFS_I(inode)->io_tree,
 
3869                                               lock_start, lock_end, GFP_NOFS);
 
3875                         btrfs_release_path(root, path);
 
3877                         inode = btrfs_iget_locked(root->fs_info->sb,
 
3878                                                   key.objectid, root);
 
3879                         if (inode->i_state & I_NEW) {
 
3880                                 BTRFS_I(inode)->root = root;
 
3881                                 BTRFS_I(inode)->location.objectid =
 
3883                                 BTRFS_I(inode)->location.type =
 
3884                                         BTRFS_INODE_ITEM_KEY;
 
3885                                 BTRFS_I(inode)->location.offset = 0;
 
3886                                 btrfs_read_locked_inode(inode);
 
3887                                 unlock_new_inode(inode);
 
3890                          * some code call btrfs_commit_transaction while
 
3891                          * holding the i_mutex, so we can't use mutex_lock
 
3894                         if (is_bad_inode(inode) ||
 
3895                             !mutex_trylock(&inode->i_mutex)) {
 
3898                                 key.offset = (u64)-1;
 
3903                 if (!extent_locked) {
 
3904                         struct btrfs_ordered_extent *ordered;
 
3906                         btrfs_release_path(root, path);
 
3908                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 
3909                                     lock_end, GFP_NOFS);
 
3910                         ordered = btrfs_lookup_first_ordered_extent(inode,
 
3913                             ordered->file_offset <= lock_end &&
 
3914                             ordered->file_offset + ordered->len > lock_start) {
 
3915                                 unlock_extent(&BTRFS_I(inode)->io_tree,
 
3916                                               lock_start, lock_end, GFP_NOFS);
 
3917                                 btrfs_start_ordered_extent(inode, ordered, 1);
 
3918                                 btrfs_put_ordered_extent(ordered);
 
3919                                 key.offset += num_bytes;
 
3923                                 btrfs_put_ordered_extent(ordered);
 
3929                 if (nr_extents == 1) {
 
3930                         /* update extent pointer in place */
 
3931                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
 
3932                                                 new_extents[0].disk_bytenr);
 
3933                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 
3934                                                 new_extents[0].disk_num_bytes);
 
3935                         btrfs_mark_buffer_dirty(leaf);
 
3937                         btrfs_drop_extent_cache(inode, key.offset,
 
3938                                                 key.offset + num_bytes - 1, 0);
 
3940                         ret = btrfs_inc_extent_ref(trans, root,
 
3941                                                 new_extents[0].disk_bytenr,
 
3942                                                 new_extents[0].disk_num_bytes,
 
3944                                                 root->root_key.objectid,
 
3949                         ret = btrfs_free_extent(trans, root,
 
3950                                                 extent_key->objectid,
 
3953                                                 btrfs_header_owner(leaf),
 
3954                                                 btrfs_header_generation(leaf),
 
3958                         btrfs_release_path(root, path);
 
3959                         key.offset += num_bytes;
 
3967                          * drop old extent pointer at first, then insert the
 
3968                          * new pointers one bye one
 
3970                         btrfs_release_path(root, path);
 
3971                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
 
3972                                                  key.offset + num_bytes,
 
3973                                                  key.offset, &alloc_hint);
 
3976                         for (i = 0; i < nr_extents; i++) {
 
3977                                 if (ext_offset >= new_extents[i].num_bytes) {
 
3978                                         ext_offset -= new_extents[i].num_bytes;
 
3981                                 extent_len = min(new_extents[i].num_bytes -
 
3982                                                  ext_offset, num_bytes);
 
3984                                 ret = btrfs_insert_empty_item(trans, root,
 
3989                                 leaf = path->nodes[0];
 
3990                                 fi = btrfs_item_ptr(leaf, path->slots[0],
 
3991                                                 struct btrfs_file_extent_item);
 
3992                                 btrfs_set_file_extent_generation(leaf, fi,
 
3994                                 btrfs_set_file_extent_type(leaf, fi,
 
3995                                                         BTRFS_FILE_EXTENT_REG);
 
3996                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
 
3997                                                 new_extents[i].disk_bytenr);
 
3998                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 
3999                                                 new_extents[i].disk_num_bytes);
 
4000                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
 
4001                                                 new_extents[i].ram_bytes);
 
4003                                 btrfs_set_file_extent_compression(leaf, fi,
 
4004                                                 new_extents[i].compression);
 
4005                                 btrfs_set_file_extent_encryption(leaf, fi,
 
4006                                                 new_extents[i].encryption);
 
4007                                 btrfs_set_file_extent_other_encoding(leaf, fi,
 
4008                                                 new_extents[i].other_encoding);
 
4010                                 btrfs_set_file_extent_num_bytes(leaf, fi,
 
4012                                 ext_offset += new_extents[i].offset;
 
4013                                 btrfs_set_file_extent_offset(leaf, fi,
 
4015                                 btrfs_mark_buffer_dirty(leaf);
 
4017                                 btrfs_drop_extent_cache(inode, key.offset,
 
4018                                                 key.offset + extent_len - 1, 0);
 
4020                                 ret = btrfs_inc_extent_ref(trans, root,
 
4021                                                 new_extents[i].disk_bytenr,
 
4022                                                 new_extents[i].disk_num_bytes,
 
4024                                                 root->root_key.objectid,
 
4025                                                 trans->transid, key.objectid);
 
4027                                 btrfs_release_path(root, path);
 
4029                                 inode_add_bytes(inode, extent_len);
 
4032                                 num_bytes -= extent_len;
 
4033                                 key.offset += extent_len;
 
4038                         BUG_ON(i >= nr_extents);
 
4042                 if (extent_locked) {
 
4043                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 
4044                                       lock_end, GFP_NOFS);
 
4048                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
 
4049                     key.offset >= first_pos + extent_key->offset)
 
4056         btrfs_release_path(root, path);
 
4058                 mutex_unlock(&inode->i_mutex);
 
4059                 if (extent_locked) {
 
4060                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 
4061                                       lock_end, GFP_NOFS);
 
4068 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
 
4069                                struct btrfs_root *root,
 
4070                                struct extent_buffer *buf, u64 orig_start)
 
4075         BUG_ON(btrfs_header_generation(buf) != trans->transid);
 
4076         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
 
4078         level = btrfs_header_level(buf);
 
4080                 struct btrfs_leaf_ref *ref;
 
4081                 struct btrfs_leaf_ref *orig_ref;
 
4083                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
 
4087                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
 
4089                         btrfs_free_leaf_ref(root, orig_ref);
 
4093                 ref->nritems = orig_ref->nritems;
 
4094                 memcpy(ref->extents, orig_ref->extents,
 
4095                         sizeof(ref->extents[0]) * ref->nritems);
 
4097                 btrfs_free_leaf_ref(root, orig_ref);
 
4099                 ref->root_gen = trans->transid;
 
4100                 ref->bytenr = buf->start;
 
4101                 ref->owner = btrfs_header_owner(buf);
 
4102                 ref->generation = btrfs_header_generation(buf);
 
4103                 ret = btrfs_add_leaf_ref(root, ref, 0);
 
4105                 btrfs_free_leaf_ref(root, ref);
 
4110 static int noinline invalidate_extent_cache(struct btrfs_root *root,
 
4111                                         struct extent_buffer *leaf,
 
4112                                         struct btrfs_block_group_cache *group,
 
4113                                         struct btrfs_root *target_root)
 
4115         struct btrfs_key key;
 
4116         struct inode *inode = NULL;
 
4117         struct btrfs_file_extent_item *fi;
 
4119         u64 skip_objectid = 0;
 
4123         nritems = btrfs_header_nritems(leaf);
 
4124         for (i = 0; i < nritems; i++) {
 
4125                 btrfs_item_key_to_cpu(leaf, &key, i);
 
4126                 if (key.objectid == skip_objectid ||
 
4127                     key.type != BTRFS_EXTENT_DATA_KEY)
 
4129                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
 
4130                 if (btrfs_file_extent_type(leaf, fi) ==
 
4131                     BTRFS_FILE_EXTENT_INLINE)
 
4133                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
 
4135                 if (!inode || inode->i_ino != key.objectid) {
 
4137                         inode = btrfs_ilookup(target_root->fs_info->sb,
 
4138                                               key.objectid, target_root, 1);
 
4141                         skip_objectid = key.objectid;
 
4144                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 
4146                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
 
4147                             key.offset + num_bytes - 1, GFP_NOFS);
 
4148                 btrfs_drop_extent_cache(inode, key.offset,
 
4149                                         key.offset + num_bytes - 1, 1);
 
4150                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
 
4151                               key.offset + num_bytes - 1, GFP_NOFS);
 
4158 static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
 
4159                                         struct btrfs_root *root,
 
4160                                         struct extent_buffer *leaf,
 
4161                                         struct btrfs_block_group_cache *group,
 
4162                                         struct inode *reloc_inode)
 
4164         struct btrfs_key key;
 
4165         struct btrfs_key extent_key;
 
4166         struct btrfs_file_extent_item *fi;
 
4167         struct btrfs_leaf_ref *ref;
 
4168         struct disk_extent *new_extent;
 
4177         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
 
4178         BUG_ON(!new_extent);
 
4180         ref = btrfs_lookup_leaf_ref(root, leaf->start);
 
4184         nritems = btrfs_header_nritems(leaf);
 
4185         for (i = 0; i < nritems; i++) {
 
4186                 btrfs_item_key_to_cpu(leaf, &key, i);
 
4187                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 
4189                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
 
4190                 if (btrfs_file_extent_type(leaf, fi) ==
 
4191                     BTRFS_FILE_EXTENT_INLINE)
 
4193                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 
4194                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
 
4199                 if (bytenr >= group->key.objectid + group->key.offset ||
 
4200                     bytenr + num_bytes <= group->key.objectid)
 
4203                 extent_key.objectid = bytenr;
 
4204                 extent_key.offset = num_bytes;
 
4205                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
 
4207                 ret = get_new_locations(reloc_inode, &extent_key,
 
4208                                         group->key.objectid, 1,
 
4209                                         &new_extent, &nr_extent);
 
4214                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
 
4215                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
 
4216                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
 
4217                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
 
4219                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
 
4220                                                 new_extent->disk_bytenr);
 
4221                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 
4222                                                 new_extent->disk_num_bytes);
 
4223                 btrfs_mark_buffer_dirty(leaf);
 
4225                 ret = btrfs_inc_extent_ref(trans, root,
 
4226                                         new_extent->disk_bytenr,
 
4227                                         new_extent->disk_num_bytes,
 
4229                                         root->root_key.objectid,
 
4230                                         trans->transid, key.objectid);
 
4232                 ret = btrfs_free_extent(trans, root,
 
4233                                         bytenr, num_bytes, leaf->start,
 
4234                                         btrfs_header_owner(leaf),
 
4235                                         btrfs_header_generation(leaf),
 
4241         BUG_ON(ext_index + 1 != ref->nritems);
 
4242         btrfs_free_leaf_ref(root, ref);
 
4246 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
 
4247                           struct btrfs_root *root)
 
4249         struct btrfs_root *reloc_root;
 
4252         if (root->reloc_root) {
 
4253                 reloc_root = root->reloc_root;
 
4254                 root->reloc_root = NULL;
 
4255                 list_add(&reloc_root->dead_list,
 
4256                          &root->fs_info->dead_reloc_roots);
 
4258                 btrfs_set_root_bytenr(&reloc_root->root_item,
 
4259                                       reloc_root->node->start);
 
4260                 btrfs_set_root_level(&root->root_item,
 
4261                                      btrfs_header_level(reloc_root->node));
 
4262                 memset(&reloc_root->root_item.drop_progress, 0,
 
4263                         sizeof(struct btrfs_disk_key));
 
4264                 reloc_root->root_item.drop_level = 0;
 
4266                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
 
4267                                         &reloc_root->root_key,
 
4268                                         &reloc_root->root_item);
 
4274 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
 
4276         struct btrfs_trans_handle *trans;
 
4277         struct btrfs_root *reloc_root;
 
4278         struct btrfs_root *prev_root = NULL;
 
4279         struct list_head dead_roots;
 
4283         INIT_LIST_HEAD(&dead_roots);
 
4284         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
 
4286         while (!list_empty(&dead_roots)) {
 
4287                 reloc_root = list_entry(dead_roots.prev,
 
4288                                         struct btrfs_root, dead_list);
 
4289                 list_del_init(&reloc_root->dead_list);
 
4291                 BUG_ON(reloc_root->commit_root != NULL);
 
4293                         trans = btrfs_join_transaction(root, 1);
 
4296                         mutex_lock(&root->fs_info->drop_mutex);
 
4297                         ret = btrfs_drop_snapshot(trans, reloc_root);
 
4300                         mutex_unlock(&root->fs_info->drop_mutex);
 
4302                         nr = trans->blocks_used;
 
4303                         ret = btrfs_end_transaction(trans, root);
 
4305                         btrfs_btree_balance_dirty(root, nr);
 
4308                 free_extent_buffer(reloc_root->node);
 
4310                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
 
4311                                      &reloc_root->root_key);
 
4313                 mutex_unlock(&root->fs_info->drop_mutex);
 
4315                 nr = trans->blocks_used;
 
4316                 ret = btrfs_end_transaction(trans, root);
 
4318                 btrfs_btree_balance_dirty(root, nr);
 
4321                 prev_root = reloc_root;
 
4324                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
 
4330 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
 
4332         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
 
4336 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
 
4338         struct btrfs_root *reloc_root;
 
4339         struct btrfs_trans_handle *trans;
 
4340         struct btrfs_key location;
 
4344         mutex_lock(&root->fs_info->tree_reloc_mutex);
 
4345         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
 
4347         found = !list_empty(&root->fs_info->dead_reloc_roots);
 
4348         mutex_unlock(&root->fs_info->tree_reloc_mutex);
 
4351                 trans = btrfs_start_transaction(root, 1);
 
4353                 ret = btrfs_commit_transaction(trans, root);
 
4357         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
 
4358         location.offset = (u64)-1;
 
4359         location.type = BTRFS_ROOT_ITEM_KEY;
 
4361         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
 
4362         BUG_ON(!reloc_root);
 
4363         btrfs_orphan_cleanup(reloc_root);
 
4367 static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
 
4368                                     struct btrfs_root *root)
 
4370         struct btrfs_root *reloc_root;
 
4371         struct extent_buffer *eb;
 
4372         struct btrfs_root_item *root_item;
 
4373         struct btrfs_key root_key;
 
4376         BUG_ON(!root->ref_cows);
 
4377         if (root->reloc_root)
 
4380         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
 
4383         ret = btrfs_copy_root(trans, root, root->commit_root,
 
4384                               &eb, BTRFS_TREE_RELOC_OBJECTID);
 
4387         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
 
4388         root_key.offset = root->root_key.objectid;
 
4389         root_key.type = BTRFS_ROOT_ITEM_KEY;
 
4391         memcpy(root_item, &root->root_item, sizeof(root_item));
 
4392         btrfs_set_root_refs(root_item, 0);
 
4393         btrfs_set_root_bytenr(root_item, eb->start);
 
4394         btrfs_set_root_level(root_item, btrfs_header_level(eb));
 
4395         btrfs_set_root_generation(root_item, trans->transid);
 
4397         btrfs_tree_unlock(eb);
 
4398         free_extent_buffer(eb);
 
4400         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
 
4401                                 &root_key, root_item);
 
4405         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
 
4407         BUG_ON(!reloc_root);
 
4408         reloc_root->last_trans = trans->transid;
 
4409         reloc_root->commit_root = NULL;
 
4410         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
 
4412         root->reloc_root = reloc_root;
 
4417  * Core function of space balance.
 
4419  * The idea is using reloc trees to relocate tree blocks in reference
 
4420  * counted roots. There is one reloc tree for each subvol, and all
 
4421  * reloc trees share same root key objectid. Reloc trees are snapshots
 
4422  * of the latest committed roots of subvols (root->commit_root).
 
4424  * To relocate a tree block referenced by a subvol, there are two steps.
 
4425  * COW the block through subvol's reloc tree, then update block pointer
 
4426  * in the subvol to point to the new block. Since all reloc trees share
 
4427  * same root key objectid, doing special handing for tree blocks owned
 
4428  * by them is easy. Once a tree block has been COWed in one reloc tree,
 
4429  * we can use the resulting new block directly when the same block is
 
4430  * required to COW again through other reloc trees. By this way, relocated
 
4431  * tree blocks are shared between reloc trees, so they are also shared
 
4434 static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
 
4435                                       struct btrfs_root *root,
 
4436                                       struct btrfs_path *path,
 
4437                                       struct btrfs_key *first_key,
 
4438                                       struct btrfs_ref_path *ref_path,
 
4439                                       struct btrfs_block_group_cache *group,
 
4440                                       struct inode *reloc_inode)
 
4442         struct btrfs_root *reloc_root;
 
4443         struct extent_buffer *eb = NULL;
 
4444         struct btrfs_key *keys;
 
4448         int lowest_level = 0;
 
4451         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
 
4452                 lowest_level = ref_path->owner_objectid;
 
4454         if (!root->ref_cows) {
 
4455                 path->lowest_level = lowest_level;
 
4456                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
 
4458                 path->lowest_level = 0;
 
4459                 btrfs_release_path(root, path);
 
4463         mutex_lock(&root->fs_info->tree_reloc_mutex);
 
4464         ret = init_reloc_tree(trans, root);
 
4466         reloc_root = root->reloc_root;
 
4468         shared_level = ref_path->shared_level;
 
4469         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
 
4471         keys = ref_path->node_keys;
 
4472         nodes = ref_path->new_nodes;
 
4473         memset(&keys[shared_level + 1], 0,
 
4474                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
 
4475         memset(&nodes[shared_level + 1], 0,
 
4476                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
 
4478         if (nodes[lowest_level] == 0) {
 
4479                 path->lowest_level = lowest_level;
 
4480                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
 
4483                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
 
4484                         eb = path->nodes[level];
 
4485                         if (!eb || eb == reloc_root->node)
 
4487                         nodes[level] = eb->start;
 
4489                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
 
4491                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
 
4493                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 
4494                         eb = path->nodes[0];
 
4495                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
 
4496                                                       group, reloc_inode);
 
4499                 btrfs_release_path(reloc_root, path);
 
4501                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
 
4507          * replace tree blocks in the fs tree with tree blocks in
 
4510         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
 
4513         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 
4514                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
 
4517                 extent_buffer_get(path->nodes[0]);
 
4518                 eb = path->nodes[0];
 
4519                 btrfs_release_path(reloc_root, path);
 
4520                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
 
4522                 free_extent_buffer(eb);
 
4525         mutex_unlock(&root->fs_info->tree_reloc_mutex);
 
4526         path->lowest_level = 0;
 
4530 static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
 
4531                                         struct btrfs_root *root,
 
4532                                         struct btrfs_path *path,
 
4533                                         struct btrfs_key *first_key,
 
4534                                         struct btrfs_ref_path *ref_path)
 
4538         ret = relocate_one_path(trans, root, path, first_key,
 
4539                                 ref_path, NULL, NULL);
 
4542         if (root == root->fs_info->extent_root)
 
4543                 btrfs_extent_post_op(trans, root);
 
4548 static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
 
4549                                     struct btrfs_root *extent_root,
 
4550                                     struct btrfs_path *path,
 
4551                                     struct btrfs_key *extent_key)
 
4555         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
 
4558         ret = btrfs_del_item(trans, extent_root, path);
 
4560         btrfs_release_path(extent_root, path);
 
4564 static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
 
4565                                                 struct btrfs_ref_path *ref_path)
 
4567         struct btrfs_key root_key;
 
4569         root_key.objectid = ref_path->root_objectid;
 
4570         root_key.type = BTRFS_ROOT_ITEM_KEY;
 
4571         if (is_cowonly_root(ref_path->root_objectid))
 
4572                 root_key.offset = 0;
 
4574                 root_key.offset = (u64)-1;
 
4576         return btrfs_read_fs_root_no_name(fs_info, &root_key);
 
4579 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
 
4580                                         struct btrfs_path *path,
 
4581                                         struct btrfs_key *extent_key,
 
4582                                         struct btrfs_block_group_cache *group,
 
4583                                         struct inode *reloc_inode, int pass)
 
4585         struct btrfs_trans_handle *trans;
 
4586         struct btrfs_root *found_root;
 
4587         struct btrfs_ref_path *ref_path = NULL;
 
4588         struct disk_extent *new_extents = NULL;
 
4593         struct btrfs_key first_key;
 
4597         trans = btrfs_start_transaction(extent_root, 1);
 
4600         if (extent_key->objectid == 0) {
 
4601                 ret = del_extent_zero(trans, extent_root, path, extent_key);
 
4605         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
 
4611         for (loops = 0; ; loops++) {
 
4613                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
 
4614                                                    extent_key->objectid);
 
4616                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
 
4623                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 
4624                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
 
4627                 found_root = read_ref_root(extent_root->fs_info, ref_path);
 
4628                 BUG_ON(!found_root);
 
4630                  * for reference counted tree, only process reference paths
 
4631                  * rooted at the latest committed root.
 
4633                 if (found_root->ref_cows &&
 
4634                     ref_path->root_generation != found_root->root_key.offset)
 
4637                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 
4640                                  * copy data extents to new locations
 
4642                                 u64 group_start = group->key.objectid;
 
4643                                 ret = relocate_data_extent(reloc_inode,
 
4652                         level = ref_path->owner_objectid;
 
4655                 if (prev_block != ref_path->nodes[level]) {
 
4656                         struct extent_buffer *eb;
 
4657                         u64 block_start = ref_path->nodes[level];
 
4658                         u64 block_size = btrfs_level_size(found_root, level);
 
4660                         eb = read_tree_block(found_root, block_start,
 
4662                         btrfs_tree_lock(eb);
 
4663                         BUG_ON(level != btrfs_header_level(eb));
 
4666                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
 
4668                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
 
4670                         btrfs_tree_unlock(eb);
 
4671                         free_extent_buffer(eb);
 
4672                         prev_block = block_start;
 
4675                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
 
4678                          * use fallback method to process the remaining
 
4682                                 u64 group_start = group->key.objectid;
 
4683                                 new_extents = kmalloc(sizeof(*new_extents),
 
4686                                 ret = get_new_locations(reloc_inode,
 
4694                         btrfs_record_root_in_trans(found_root);
 
4695                         ret = replace_one_extent(trans, found_root,
 
4697                                                 &first_key, ref_path,
 
4698                                                 new_extents, nr_extents);
 
4704                 btrfs_record_root_in_trans(found_root);
 
4705                 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
 
4706                         ret = relocate_tree_block(trans, found_root, path,
 
4707                                                   &first_key, ref_path);
 
4710                          * try to update data extent references while
 
4711                          * keeping metadata shared between snapshots.
 
4713                         ret = relocate_one_path(trans, found_root, path,
 
4714                                                 &first_key, ref_path,
 
4715                                                 group, reloc_inode);
 
4722         btrfs_end_transaction(trans, extent_root);
 
4728 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 
4731         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
 
4732                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
4734         num_devices = root->fs_info->fs_devices->num_devices;
 
4735         if (num_devices == 1) {
 
4736                 stripped |= BTRFS_BLOCK_GROUP_DUP;
 
4737                 stripped = flags & ~stripped;
 
4739                 /* turn raid0 into single device chunks */
 
4740                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
 
4743                 /* turn mirroring into duplication */
 
4744                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
 
4745                              BTRFS_BLOCK_GROUP_RAID10))
 
4746                         return stripped | BTRFS_BLOCK_GROUP_DUP;
 
4749                 /* they already had raid on here, just return */
 
4750                 if (flags & stripped)
 
4753                 stripped |= BTRFS_BLOCK_GROUP_DUP;
 
4754                 stripped = flags & ~stripped;
 
4756                 /* switch duplicated blocks with raid1 */
 
4757                 if (flags & BTRFS_BLOCK_GROUP_DUP)
 
4758                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
 
4760                 /* turn single device chunks into raid0 */
 
4761                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
 
4766 int __alloc_chunk_for_shrink(struct btrfs_root *root,
 
4767                      struct btrfs_block_group_cache *shrink_block_group,
 
4770         struct btrfs_trans_handle *trans;
 
4771         u64 new_alloc_flags;
 
4774         spin_lock(&shrink_block_group->lock);
 
4775         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
 
4776                 spin_unlock(&shrink_block_group->lock);
 
4778                 trans = btrfs_start_transaction(root, 1);
 
4779                 spin_lock(&shrink_block_group->lock);
 
4781                 new_alloc_flags = update_block_group_flags(root,
 
4782                                                    shrink_block_group->flags);
 
4783                 if (new_alloc_flags != shrink_block_group->flags) {
 
4785                              btrfs_block_group_used(&shrink_block_group->item);
 
4787                         calc = shrink_block_group->key.offset;
 
4789                 spin_unlock(&shrink_block_group->lock);
 
4791                 do_chunk_alloc(trans, root->fs_info->extent_root,
 
4792                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
 
4794                 btrfs_end_transaction(trans, root);
 
4796                 spin_unlock(&shrink_block_group->lock);
 
4800 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
 
4801                                  struct btrfs_root *root,
 
4802                                  u64 objectid, u64 size)
 
4804         struct btrfs_path *path;
 
4805         struct btrfs_inode_item *item;
 
4806         struct extent_buffer *leaf;
 
4809         path = btrfs_alloc_path();
 
4813         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
 
4817         leaf = path->nodes[0];
 
4818         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
 
4819         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
 
4820         btrfs_set_inode_generation(leaf, item, 1);
 
4821         btrfs_set_inode_size(leaf, item, size);
 
4822         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
 
4823         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM |
 
4824                                           BTRFS_INODE_NOCOMPRESS);
 
4825         btrfs_mark_buffer_dirty(leaf);
 
4826         btrfs_release_path(root, path);
 
4828         btrfs_free_path(path);
 
4832 static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
 
4833                                         struct btrfs_block_group_cache *group)
 
4835         struct inode *inode = NULL;
 
4836         struct btrfs_trans_handle *trans;
 
4837         struct btrfs_root *root;
 
4838         struct btrfs_key root_key;
 
4839         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
 
4842         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
 
4843         root_key.type = BTRFS_ROOT_ITEM_KEY;
 
4844         root_key.offset = (u64)-1;
 
4845         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
 
4847                 return ERR_CAST(root);
 
4849         trans = btrfs_start_transaction(root, 1);
 
4852         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
 
4856         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
 
4859         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
 
4860                                        group->key.offset, 0, group->key.offset,
 
4864         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
 
4865         if (inode->i_state & I_NEW) {
 
4866                 BTRFS_I(inode)->root = root;
 
4867                 BTRFS_I(inode)->location.objectid = objectid;
 
4868                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
 
4869                 BTRFS_I(inode)->location.offset = 0;
 
4870                 btrfs_read_locked_inode(inode);
 
4871                 unlock_new_inode(inode);
 
4872                 BUG_ON(is_bad_inode(inode));
 
4877         err = btrfs_orphan_add(trans, inode);
 
4879         btrfs_end_transaction(trans, root);
 
4883                 inode = ERR_PTR(err);
 
4888 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
 
4890         struct btrfs_trans_handle *trans;
 
4891         struct btrfs_path *path;
 
4892         struct btrfs_fs_info *info = root->fs_info;
 
4893         struct extent_buffer *leaf;
 
4894         struct inode *reloc_inode;
 
4895         struct btrfs_block_group_cache *block_group;
 
4896         struct btrfs_key key;
 
4905         root = root->fs_info->extent_root;
 
4907         block_group = btrfs_lookup_block_group(info, group_start);
 
4908         BUG_ON(!block_group);
 
4910         printk("btrfs relocating block group %llu flags %llu\n",
 
4911                (unsigned long long)block_group->key.objectid,
 
4912                (unsigned long long)block_group->flags);
 
4914         path = btrfs_alloc_path();
 
4917         reloc_inode = create_reloc_inode(info, block_group);
 
4918         BUG_ON(IS_ERR(reloc_inode));
 
4920         __alloc_chunk_for_shrink(root, block_group, 1);
 
4921         block_group->ro = 1;
 
4922         block_group->space_info->total_bytes -= block_group->key.offset;
 
4924         btrfs_start_delalloc_inodes(info->tree_root);
 
4925         btrfs_wait_ordered_extents(info->tree_root, 0);
 
4930         key.objectid = block_group->key.objectid;
 
4933         cur_byte = key.objectid;
 
4935         trans = btrfs_start_transaction(info->tree_root, 1);
 
4936         btrfs_commit_transaction(trans, info->tree_root);
 
4938         mutex_lock(&root->fs_info->cleaner_mutex);
 
4939         btrfs_clean_old_snapshots(info->tree_root);
 
4940         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
 
4941         mutex_unlock(&root->fs_info->cleaner_mutex);
 
4944                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 
4948                 leaf = path->nodes[0];
 
4949                 nritems = btrfs_header_nritems(leaf);
 
4950                 if (path->slots[0] >= nritems) {
 
4951                         ret = btrfs_next_leaf(root, path);
 
4958                         leaf = path->nodes[0];
 
4959                         nritems = btrfs_header_nritems(leaf);
 
4962                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
4964                 if (key.objectid >= block_group->key.objectid +
 
4965                     block_group->key.offset)
 
4968                 if (progress && need_resched()) {
 
4969                         btrfs_release_path(root, path);
 
4976                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
 
4977                     key.objectid + key.offset <= cur_byte) {
 
4983                 cur_byte = key.objectid + key.offset;
 
4984                 btrfs_release_path(root, path);
 
4986                 __alloc_chunk_for_shrink(root, block_group, 0);
 
4987                 ret = relocate_one_extent(root, path, &key, block_group,
 
4993                 key.objectid = cur_byte;
 
4998         btrfs_release_path(root, path);
 
5001                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
 
5002                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
 
5003                 WARN_ON(reloc_inode->i_mapping->nrpages);
 
5006         if (total_found > 0) {
 
5007                 printk("btrfs found %llu extents in pass %d\n",
 
5008                        (unsigned long long)total_found, pass);
 
5010                 if (total_found == skipped && pass > 2) {
 
5012                         reloc_inode = create_reloc_inode(info, block_group);
 
5018         /* delete reloc_inode */
 
5021         /* unpin extents in this range */
 
5022         trans = btrfs_start_transaction(info->tree_root, 1);
 
5023         btrfs_commit_transaction(trans, info->tree_root);
 
5025         spin_lock(&block_group->lock);
 
5026         WARN_ON(block_group->pinned > 0);
 
5027         WARN_ON(block_group->reserved > 0);
 
5028         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
 
5029         spin_unlock(&block_group->lock);
 
5032         btrfs_free_path(path);
 
5036 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
 
5037                            struct btrfs_key *key)
 
5040         struct btrfs_key found_key;
 
5041         struct extent_buffer *leaf;
 
5044         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
 
5049                 slot = path->slots[0];
 
5050                 leaf = path->nodes[0];
 
5051                 if (slot >= btrfs_header_nritems(leaf)) {
 
5052                         ret = btrfs_next_leaf(root, path);
 
5059                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
5061                 if (found_key.objectid >= key->objectid &&
 
5062                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
 
5073 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 
5075         struct btrfs_block_group_cache *block_group;
 
5078         spin_lock(&info->block_group_cache_lock);
 
5079         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
 
5080                 block_group = rb_entry(n, struct btrfs_block_group_cache,
 
5082                 rb_erase(&block_group->cache_node,
 
5083                          &info->block_group_cache_tree);
 
5084                 spin_unlock(&info->block_group_cache_lock);
 
5086                 btrfs_remove_free_space_cache(block_group);
 
5087                 down_write(&block_group->space_info->groups_sem);
 
5088                 list_del(&block_group->list);
 
5089                 up_write(&block_group->space_info->groups_sem);
 
5092                 spin_lock(&info->block_group_cache_lock);
 
5094         spin_unlock(&info->block_group_cache_lock);
 
5098 int btrfs_read_block_groups(struct btrfs_root *root)
 
5100         struct btrfs_path *path;
 
5102         struct btrfs_block_group_cache *cache;
 
5103         struct btrfs_fs_info *info = root->fs_info;
 
5104         struct btrfs_space_info *space_info;
 
5105         struct btrfs_key key;
 
5106         struct btrfs_key found_key;
 
5107         struct extent_buffer *leaf;
 
5109         root = info->extent_root;
 
5112         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
 
5113         path = btrfs_alloc_path();
 
5118                 ret = find_first_block_group(root, path, &key);
 
5126                 leaf = path->nodes[0];
 
5127                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
5128                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
 
5134                 spin_lock_init(&cache->lock);
 
5135                 mutex_init(&cache->alloc_mutex);
 
5136                 INIT_LIST_HEAD(&cache->list);
 
5137                 read_extent_buffer(leaf, &cache->item,
 
5138                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
 
5139                                    sizeof(cache->item));
 
5140                 memcpy(&cache->key, &found_key, sizeof(found_key));
 
5142                 key.objectid = found_key.objectid + found_key.offset;
 
5143                 btrfs_release_path(root, path);
 
5144                 cache->flags = btrfs_block_group_flags(&cache->item);
 
5146                 ret = update_space_info(info, cache->flags, found_key.offset,
 
5147                                         btrfs_block_group_used(&cache->item),
 
5150                 cache->space_info = space_info;
 
5151                 down_write(&space_info->groups_sem);
 
5152                 list_add_tail(&cache->list, &space_info->block_groups);
 
5153                 up_write(&space_info->groups_sem);
 
5155                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
 
5158                 set_avail_alloc_bits(root->fs_info, cache->flags);
 
5162         btrfs_free_path(path);
 
5166 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
5167                            struct btrfs_root *root, u64 bytes_used,
 
5168                            u64 type, u64 chunk_objectid, u64 chunk_offset,
 
5172         struct btrfs_root *extent_root;
 
5173         struct btrfs_block_group_cache *cache;
 
5175         extent_root = root->fs_info->extent_root;
 
5177         root->fs_info->last_trans_new_blockgroup = trans->transid;
 
5179         cache = kzalloc(sizeof(*cache), GFP_NOFS);
 
5183         cache->key.objectid = chunk_offset;
 
5184         cache->key.offset = size;
 
5185         spin_lock_init(&cache->lock);
 
5186         mutex_init(&cache->alloc_mutex);
 
5187         INIT_LIST_HEAD(&cache->list);
 
5188         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
 
5190         btrfs_set_block_group_used(&cache->item, bytes_used);
 
5191         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
 
5192         cache->flags = type;
 
5193         btrfs_set_block_group_flags(&cache->item, type);
 
5195         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
 
5196                                 &cache->space_info);
 
5198         down_write(&cache->space_info->groups_sem);
 
5199         list_add_tail(&cache->list, &cache->space_info->block_groups);
 
5200         up_write(&cache->space_info->groups_sem);
 
5202         ret = btrfs_add_block_group_cache(root->fs_info, cache);
 
5205         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
 
5206                                 sizeof(cache->item));
 
5209         finish_current_insert(trans, extent_root, 0);
 
5210         ret = del_pending_extents(trans, extent_root, 0);
 
5212         set_avail_alloc_bits(extent_root->fs_info, type);
 
5217 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
5218                              struct btrfs_root *root, u64 group_start)
 
5220         struct btrfs_path *path;
 
5221         struct btrfs_block_group_cache *block_group;
 
5222         struct btrfs_key key;
 
5225         root = root->fs_info->extent_root;
 
5227         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
 
5228         BUG_ON(!block_group);
 
5230         memcpy(&key, &block_group->key, sizeof(key));
 
5232         path = btrfs_alloc_path();
 
5235         btrfs_remove_free_space_cache(block_group);
 
5236         rb_erase(&block_group->cache_node,
 
5237                  &root->fs_info->block_group_cache_tree);
 
5238         down_write(&block_group->space_info->groups_sem);
 
5239         list_del(&block_group->list);
 
5240         up_write(&block_group->space_info->groups_sem);
 
5243         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
 
5244         kfree(shrink_block_group);
 
5247         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 
5253         ret = btrfs_del_item(trans, root, path);
 
5255         btrfs_free_path(path);