2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
23 #include "print-tree.h"
24 #include "transaction.h"
26 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
27 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
28 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
30 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
31 btrfs_root *extent_root);
32 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
33 btrfs_root *extent_root);
35 static int cache_block_group(struct btrfs_root *root,
36 struct btrfs_block_group_cache *block_group)
38 struct btrfs_path *path;
41 struct extent_buffer *leaf;
42 struct extent_map_tree *free_space_cache;
52 root = root->fs_info->extent_root;
53 free_space_cache = &root->fs_info->free_space_cache;
55 if (block_group->cached)
58 path = btrfs_alloc_path();
63 first_free = block_group->key.objectid;
64 key.objectid = block_group->key.objectid;
67 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
68 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
73 if (ret && path->slots[0] > 0)
77 leaf = path->nodes[0];
78 slot = path->slots[0];
79 if (slot >= btrfs_header_nritems(leaf)) {
80 ret = btrfs_next_leaf(root, path);
90 btrfs_item_key_to_cpu(leaf, &key, slot);
91 if (key.objectid < block_group->key.objectid) {
92 if (key.objectid + key.offset > first_free)
93 first_free = key.objectid + key.offset;
97 if (key.objectid >= block_group->key.objectid +
98 block_group->key.offset) {
102 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
107 if (key.objectid > last) {
108 hole_size = key.objectid - last;
109 set_extent_dirty(free_space_cache, last,
110 last + hole_size - 1,
113 last = key.objectid + key.offset;
121 if (block_group->key.objectid +
122 block_group->key.offset > last) {
123 hole_size = block_group->key.objectid +
124 block_group->key.offset - last;
125 set_extent_dirty(free_space_cache, last,
126 last + hole_size - 1, GFP_NOFS);
128 block_group->cached = 1;
130 btrfs_free_path(path);
134 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
138 struct extent_map_tree *block_group_cache;
139 struct btrfs_block_group_cache *block_group = NULL;
145 block_group_cache = &info->block_group_cache;
146 ret = find_first_extent_bit(block_group_cache,
147 bytenr, &start, &end,
148 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
152 ret = get_state_private(block_group_cache, start, &ptr);
156 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
157 if (block_group->key.objectid <= bytenr && bytenr <
158 block_group->key.objectid + block_group->key.offset)
162 static u64 find_search_start(struct btrfs_root *root,
163 struct btrfs_block_group_cache **cache_ret,
164 u64 search_start, int num,
165 int data, int full_scan)
168 struct btrfs_block_group_cache *cache = *cache_ret;
179 ret = cache_block_group(root, cache);
183 last = max(search_start, cache->key.objectid);
186 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
187 last, &start, &end, EXTENT_DIRTY);
194 start = max(last, start);
196 if (last - start < num) {
197 if (last == cache->key.objectid + cache->key.offset)
201 if (data != BTRFS_BLOCK_GROUP_MIXED &&
202 start + num > cache->key.objectid + cache->key.offset)
207 cache = btrfs_lookup_block_group(root->fs_info, search_start);
209 printk("Unable to find block group for %Lu\n",
217 last = cache->key.objectid + cache->key.offset;
219 cache = btrfs_lookup_block_group(root->fs_info, last);
225 data = BTRFS_BLOCK_GROUP_MIXED;
230 if (cache_miss && !cache->cached) {
231 cache_block_group(root, cache);
233 cache = btrfs_lookup_block_group(root->fs_info, last);
235 cache = btrfs_find_block_group(root, cache, last, data, 0);
243 static u64 div_factor(u64 num, int factor)
252 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
253 struct btrfs_block_group_cache
254 *hint, u64 search_start,
257 struct btrfs_block_group_cache *cache;
258 struct extent_map_tree *block_group_cache;
259 struct btrfs_block_group_cache *found_group = NULL;
260 struct btrfs_fs_info *info = root->fs_info;
274 block_group_cache = &info->block_group_cache;
279 if (data == BTRFS_BLOCK_GROUP_MIXED) {
280 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
283 bit = BLOCK_GROUP_DATA;
285 bit = BLOCK_GROUP_METADATA;
288 struct btrfs_block_group_cache *shint;
289 shint = btrfs_lookup_block_group(info, search_start);
290 if (shint && (shint->data == data ||
291 shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
292 used = btrfs_block_group_used(&shint->item);
293 if (used + shint->pinned <
294 div_factor(shint->key.offset, factor)) {
299 if (hint && (hint->data == data ||
300 hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
301 used = btrfs_block_group_used(&hint->item);
302 if (used + hint->pinned <
303 div_factor(hint->key.offset, factor)) {
306 last = hint->key.objectid + hint->key.offset;
310 hint_last = max(hint->key.objectid, search_start);
312 hint_last = search_start;
318 ret = find_first_extent_bit(block_group_cache, last,
323 ret = get_state_private(block_group_cache, start, &ptr);
327 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
328 last = cache->key.objectid + cache->key.offset;
329 used = btrfs_block_group_used(&cache->item);
332 free_check = cache->key.offset;
334 free_check = div_factor(cache->key.offset, factor);
335 if (used + cache->pinned < free_check) {
348 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
356 static u64 hash_extent_ref(u64 root_objectid, u64 root_generation,
357 u64 owner, u64 owner_offset)
359 u32 high_crc = ~(u32)0;
360 u32 low_crc = ~(u32)0;
363 lenum = cpu_to_le64(root_objectid);
364 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
365 lenum = cpu_to_le64(root_generation);
366 high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
368 lenum = cpu_to_le64(owner);
369 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
371 lenum = cpu_to_le64(owner_offset);
372 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
374 return ((u64)high_crc << 32) | (u64)low_crc;
377 int insert_extent_ref(struct btrfs_trans_handle *trans,
378 struct btrfs_root *root,
379 struct btrfs_path *path,
381 u64 root_objectid, u64 root_generation,
382 u64 owner, u64 owner_offset)
385 struct btrfs_key key;
386 struct btrfs_extent_ref ref;
387 struct extent_buffer *l;
388 struct btrfs_extent_item *item;
391 btrfs_set_stack_ref_root(&ref, root_objectid);
392 btrfs_set_stack_ref_generation(&ref, root_generation);
393 btrfs_set_stack_ref_objectid(&ref, owner);
394 btrfs_set_stack_ref_offset(&ref, owner_offset);
396 ret = btrfs_name_hash(&ref, sizeof(ref), &hash);
398 key.objectid = bytenr;
399 key.type = BTRFS_EXTENT_REF_KEY;
401 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
402 while (ret == -EEXIST) {
408 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
409 struct btrfs_root *root,
410 u64 bytenr, u64 num_bytes,
411 u64 root_objectid, u64 root_generation,
412 u64 owner, u64 owner_offset)
414 struct btrfs_path *path;
416 struct btrfs_key key;
417 struct extent_buffer *l;
418 struct btrfs_extent_item *item;
421 WARN_ON(num_bytes < root->sectorsize);
422 path = btrfs_alloc_path();
426 key.objectid = bytenr;
427 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
428 key.offset = num_bytes;
429 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
438 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
439 refs = btrfs_extent_refs(l, item);
440 btrfs_set_extent_refs(l, item, refs + 1);
441 btrfs_mark_buffer_dirty(path->nodes[0]);
443 btrfs_release_path(root->fs_info->extent_root, path);
444 finish_current_insert(trans, root->fs_info->extent_root);
445 del_pending_extents(trans, root->fs_info->extent_root);
447 btrfs_free_path(path);
451 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
452 struct btrfs_root *root)
454 finish_current_insert(trans, root->fs_info->extent_root);
455 del_pending_extents(trans, root->fs_info->extent_root);
459 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
460 struct btrfs_root *root, u64 bytenr,
461 u64 num_bytes, u32 *refs)
463 struct btrfs_path *path;
465 struct btrfs_key key;
466 struct extent_buffer *l;
467 struct btrfs_extent_item *item;
469 WARN_ON(num_bytes < root->sectorsize);
470 path = btrfs_alloc_path();
471 key.objectid = bytenr;
472 key.offset = num_bytes;
473 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
474 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
479 btrfs_print_leaf(root, path->nodes[0]);
480 printk("failed to find block number %Lu\n", bytenr);
484 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
485 *refs = btrfs_extent_refs(l, item);
487 btrfs_free_path(path);
491 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
492 struct btrfs_root *root)
494 return btrfs_inc_extent_ref(trans, root, root->node->start,
498 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
499 struct extent_buffer *buf)
503 struct btrfs_key key;
504 struct btrfs_file_extent_item *fi;
514 level = btrfs_header_level(buf);
515 nritems = btrfs_header_nritems(buf);
516 for (i = 0; i < nritems; i++) {
519 btrfs_item_key_to_cpu(buf, &key, i);
520 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
522 fi = btrfs_item_ptr(buf, i,
523 struct btrfs_file_extent_item);
524 if (btrfs_file_extent_type(buf, fi) ==
525 BTRFS_FILE_EXTENT_INLINE)
527 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
528 if (disk_bytenr == 0)
530 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
531 btrfs_file_extent_disk_num_bytes(buf, fi));
537 bytenr = btrfs_node_blockptr(buf, i);
538 ret = btrfs_inc_extent_ref(trans, root, bytenr,
539 btrfs_level_size(root, level - 1));
549 for (i =0; i < faili; i++) {
552 btrfs_item_key_to_cpu(buf, &key, i);
553 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
555 fi = btrfs_item_ptr(buf, i,
556 struct btrfs_file_extent_item);
557 if (btrfs_file_extent_type(buf, fi) ==
558 BTRFS_FILE_EXTENT_INLINE)
560 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
561 if (disk_bytenr == 0)
563 err = btrfs_free_extent(trans, root, disk_bytenr,
564 btrfs_file_extent_disk_num_bytes(buf,
568 bytenr = btrfs_node_blockptr(buf, i);
569 err = btrfs_free_extent(trans, root, bytenr,
570 btrfs_level_size(root, level - 1), 0);
577 static int write_one_cache_group(struct btrfs_trans_handle *trans,
578 struct btrfs_root *root,
579 struct btrfs_path *path,
580 struct btrfs_block_group_cache *cache)
584 struct btrfs_root *extent_root = root->fs_info->extent_root;
586 struct extent_buffer *leaf;
588 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
593 leaf = path->nodes[0];
594 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
595 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
596 btrfs_mark_buffer_dirty(leaf);
597 btrfs_release_path(extent_root, path);
599 finish_current_insert(trans, extent_root);
600 pending_ret = del_pending_extents(trans, extent_root);
609 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
610 struct btrfs_root *root)
612 struct extent_map_tree *block_group_cache;
613 struct btrfs_block_group_cache *cache;
617 struct btrfs_path *path;
623 block_group_cache = &root->fs_info->block_group_cache;
624 path = btrfs_alloc_path();
629 ret = find_first_extent_bit(block_group_cache, last,
630 &start, &end, BLOCK_GROUP_DIRTY);
635 ret = get_state_private(block_group_cache, start, &ptr);
639 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
640 err = write_one_cache_group(trans, root,
643 * if we fail to write the cache group, we want
644 * to keep it marked dirty in hopes that a later
651 clear_extent_bits(block_group_cache, start, end,
652 BLOCK_GROUP_DIRTY, GFP_NOFS);
654 btrfs_free_path(path);
658 static int update_block_group(struct btrfs_trans_handle *trans,
659 struct btrfs_root *root,
660 u64 bytenr, u64 num_bytes, int alloc,
661 int mark_free, int data)
663 struct btrfs_block_group_cache *cache;
664 struct btrfs_fs_info *info = root->fs_info;
665 u64 total = num_bytes;
672 cache = btrfs_lookup_block_group(info, bytenr);
676 byte_in_group = bytenr - cache->key.objectid;
677 WARN_ON(byte_in_group > cache->key.offset);
678 start = cache->key.objectid;
679 end = start + cache->key.offset - 1;
680 set_extent_bits(&info->block_group_cache, start, end,
681 BLOCK_GROUP_DIRTY, GFP_NOFS);
683 old_val = btrfs_block_group_used(&cache->item);
684 num_bytes = min(total, cache->key.offset - byte_in_group);
686 if (cache->data != data &&
687 old_val < (cache->key.offset >> 1)) {
692 bit_to_clear = BLOCK_GROUP_METADATA;
693 bit_to_set = BLOCK_GROUP_DATA;
695 ~BTRFS_BLOCK_GROUP_MIXED;
697 BTRFS_BLOCK_GROUP_DATA;
699 bit_to_clear = BLOCK_GROUP_DATA;
700 bit_to_set = BLOCK_GROUP_METADATA;
702 ~BTRFS_BLOCK_GROUP_MIXED;
704 ~BTRFS_BLOCK_GROUP_DATA;
706 clear_extent_bits(&info->block_group_cache,
707 start, end, bit_to_clear,
709 set_extent_bits(&info->block_group_cache,
710 start, end, bit_to_set,
712 } else if (cache->data != data &&
713 cache->data != BTRFS_BLOCK_GROUP_MIXED) {
714 cache->data = BTRFS_BLOCK_GROUP_MIXED;
715 set_extent_bits(&info->block_group_cache,
718 BLOCK_GROUP_METADATA,
721 old_val += num_bytes;
723 old_val -= num_bytes;
725 set_extent_dirty(&info->free_space_cache,
726 bytenr, bytenr + num_bytes - 1,
730 btrfs_set_block_group_used(&cache->item, old_val);
736 static int update_pinned_extents(struct btrfs_root *root,
737 u64 bytenr, u64 num, int pin)
740 struct btrfs_block_group_cache *cache;
741 struct btrfs_fs_info *fs_info = root->fs_info;
744 set_extent_dirty(&fs_info->pinned_extents,
745 bytenr, bytenr + num - 1, GFP_NOFS);
747 clear_extent_dirty(&fs_info->pinned_extents,
748 bytenr, bytenr + num - 1, GFP_NOFS);
751 cache = btrfs_lookup_block_group(fs_info, bytenr);
753 len = min(num, cache->key.offset -
754 (bytenr - cache->key.objectid));
756 cache->pinned += len;
757 fs_info->total_pinned += len;
759 cache->pinned -= len;
760 fs_info->total_pinned -= len;
768 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
773 struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
777 ret = find_first_extent_bit(pinned_extents, last,
778 &start, &end, EXTENT_DIRTY);
781 set_extent_dirty(copy, start, end, GFP_NOFS);
787 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
788 struct btrfs_root *root,
789 struct extent_map_tree *unpin)
794 struct extent_map_tree *free_space_cache;
795 free_space_cache = &root->fs_info->free_space_cache;
798 ret = find_first_extent_bit(unpin, 0, &start, &end,
802 update_pinned_extents(root, start, end + 1 - start, 0);
803 clear_extent_dirty(unpin, start, end, GFP_NOFS);
804 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
809 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
810 btrfs_root *extent_root)
812 struct btrfs_key ins;
813 struct btrfs_extent_item extent_item;
818 struct btrfs_fs_info *info = extent_root->fs_info;
820 btrfs_set_stack_extent_refs(&extent_item, 1);
821 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
822 btrfs_set_stack_extent_owner(&extent_item,
823 extent_root->root_key.objectid);
826 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
827 &end, EXTENT_LOCKED);
831 ins.objectid = start;
832 ins.offset = end + 1 - start;
833 err = btrfs_insert_item(trans, extent_root, &ins,
834 &extent_item, sizeof(extent_item));
835 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
841 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
845 struct extent_buffer *buf;
848 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
850 if (btrfs_buffer_uptodate(buf)) {
852 root->fs_info->running_transaction->transid;
853 if (btrfs_header_generation(buf) == transid) {
854 free_extent_buffer(buf);
858 free_extent_buffer(buf);
860 update_pinned_extents(root, bytenr, num_bytes, 1);
862 set_extent_bits(&root->fs_info->pending_del,
863 bytenr, bytenr + num_bytes - 1,
864 EXTENT_LOCKED, GFP_NOFS);
871 * remove an extent from the root, returns 0 on success
873 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
874 *root, u64 bytenr, u64 num_bytes, int pin,
877 struct btrfs_path *path;
878 struct btrfs_key key;
879 struct btrfs_fs_info *info = root->fs_info;
880 struct btrfs_root *extent_root = info->extent_root;
881 struct extent_buffer *leaf;
883 struct btrfs_extent_item *ei;
886 key.objectid = bytenr;
887 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
888 key.offset = num_bytes;
890 path = btrfs_alloc_path();
894 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
899 leaf = path->nodes[0];
900 ei = btrfs_item_ptr(leaf, path->slots[0],
901 struct btrfs_extent_item);
902 refs = btrfs_extent_refs(leaf, ei);
905 btrfs_set_extent_refs(leaf, ei, refs);
906 btrfs_mark_buffer_dirty(leaf);
913 ret = pin_down_bytes(root, bytenr, num_bytes, 0);
919 /* block accounting for super block */
920 super_used = btrfs_super_bytes_used(&info->super_copy);
921 btrfs_set_super_bytes_used(&info->super_copy,
922 super_used - num_bytes);
924 /* block accounting for root item */
925 root_used = btrfs_root_used(&root->root_item);
926 btrfs_set_root_used(&root->root_item,
927 root_used - num_bytes);
929 ret = btrfs_del_item(trans, extent_root, path);
933 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
937 btrfs_free_path(path);
938 finish_current_insert(trans, extent_root);
943 * find all the blocks marked as pending in the radix tree and remove
944 * them from the extent map
946 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
947 btrfs_root *extent_root)
953 struct extent_map_tree *pending_del;
954 struct extent_map_tree *pinned_extents;
956 pending_del = &extent_root->fs_info->pending_del;
957 pinned_extents = &extent_root->fs_info->pinned_extents;
960 ret = find_first_extent_bit(pending_del, 0, &start, &end,
964 update_pinned_extents(extent_root, start, end + 1 - start, 1);
965 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
967 ret = __free_extent(trans, extent_root,
968 start, end + 1 - start, 0, 0);
976 * remove an extent from the root, returns 0 on success
978 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
979 *root, u64 bytenr, u64 num_bytes, int pin)
981 struct btrfs_root *extent_root = root->fs_info->extent_root;
985 WARN_ON(num_bytes < root->sectorsize);
986 if (root == extent_root) {
987 pin_down_bytes(root, bytenr, num_bytes, 1);
990 ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
991 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
992 return ret ? ret : pending_ret;
995 static u64 stripe_align(struct btrfs_root *root, u64 val)
997 u64 mask = ((u64)root->stripesize - 1);
998 u64 ret = (val + mask) & ~mask;
1003 * walks the btree of allocated extents and find a hole of a given size.
1004 * The key ins is changed to record the hole:
1005 * ins->objectid == block start
1006 * ins->flags = BTRFS_EXTENT_ITEM_KEY
1007 * ins->offset == number of blocks
1008 * Any available blocks before search_start are skipped.
1010 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1011 *orig_root, u64 num_bytes, u64 empty_size,
1012 u64 search_start, u64 search_end, u64 hint_byte,
1013 struct btrfs_key *ins, u64 exclude_start,
1014 u64 exclude_nr, int data)
1016 struct btrfs_path *path;
1017 struct btrfs_key key;
1023 u64 orig_search_start = search_start;
1025 struct extent_buffer *l;
1026 struct btrfs_root * root = orig_root->fs_info->extent_root;
1027 struct btrfs_fs_info *info = root->fs_info;
1028 u64 total_needed = num_bytes;
1030 struct btrfs_block_group_cache *block_group;
1035 WARN_ON(num_bytes < root->sectorsize);
1036 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1038 level = btrfs_header_level(root->node);
1040 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1041 data = BTRFS_BLOCK_GROUP_MIXED;
1044 if (search_end == (u64)-1)
1045 search_end = btrfs_super_total_bytes(&info->super_copy);
1047 block_group = btrfs_lookup_block_group(info, hint_byte);
1049 hint_byte = search_start;
1050 block_group = btrfs_find_block_group(root, block_group,
1051 hint_byte, data, 1);
1053 block_group = btrfs_find_block_group(root,
1055 search_start, data, 1);
1058 total_needed += empty_size;
1059 path = btrfs_alloc_path();
1061 search_start = find_search_start(root, &block_group, search_start,
1062 total_needed, data, full_scan);
1063 search_start = stripe_align(root, search_start);
1064 cached_start = search_start;
1065 btrfs_init_path(path);
1066 ins->objectid = search_start;
1071 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1075 if (path->slots[0] > 0) {
1080 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1083 * a rare case, go back one key if we hit a block group item
1084 * instead of an extent item
1086 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1087 key.objectid + key.offset >= search_start) {
1088 ins->objectid = key.objectid;
1089 ins->offset = key.offset - 1;
1090 btrfs_release_path(root, path);
1091 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1095 if (path->slots[0] > 0) {
1102 slot = path->slots[0];
1103 if (slot >= btrfs_header_nritems(l)) {
1104 ret = btrfs_next_leaf(root, path);
1110 search_start = max(search_start,
1111 block_group->key.objectid);
1113 aligned = stripe_align(root, search_start);
1114 ins->objectid = aligned;
1115 if (aligned >= search_end) {
1119 ins->offset = search_end - aligned;
1123 ins->objectid = stripe_align(root,
1124 last_byte > search_start ?
1125 last_byte : search_start);
1126 if (search_end <= ins->objectid) {
1130 ins->offset = search_end - ins->objectid;
1131 BUG_ON(ins->objectid >= search_end);
1134 btrfs_item_key_to_cpu(l, &key, slot);
1136 if (key.objectid >= search_start && key.objectid > last_byte &&
1138 if (last_byte < search_start)
1139 last_byte = search_start;
1140 aligned = stripe_align(root, last_byte);
1141 hole_size = key.objectid - aligned;
1142 if (key.objectid > aligned && hole_size >= num_bytes) {
1143 ins->objectid = aligned;
1144 ins->offset = hole_size;
1148 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1150 last_byte = key.objectid;
1158 last_byte = key.objectid + key.offset;
1160 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1161 last_byte >= block_group->key.objectid +
1162 block_group->key.offset) {
1163 btrfs_release_path(root, path);
1164 search_start = block_group->key.objectid +
1165 block_group->key.offset;
1173 /* we have to make sure we didn't find an extent that has already
1174 * been allocated by the map tree or the original allocation
1176 btrfs_release_path(root, path);
1177 BUG_ON(ins->objectid < search_start);
1179 if (ins->objectid + num_bytes >= search_end)
1181 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1182 ins->objectid + num_bytes > block_group->
1183 key.objectid + block_group->key.offset) {
1184 search_start = block_group->key.objectid +
1185 block_group->key.offset;
1188 if (test_range_bit(&info->extent_ins, ins->objectid,
1189 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1190 search_start = ins->objectid + num_bytes;
1193 if (test_range_bit(&info->pinned_extents, ins->objectid,
1194 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1195 search_start = ins->objectid + num_bytes;
1198 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1199 ins->objectid < exclude_start + exclude_nr)) {
1200 search_start = exclude_start + exclude_nr;
1204 block_group = btrfs_lookup_block_group(info, ins->objectid);
1206 trans->block_group = block_group;
1208 ins->offset = num_bytes;
1209 btrfs_free_path(path);
1213 if (search_start + num_bytes >= search_end) {
1215 search_start = orig_search_start;
1222 total_needed -= empty_size;
1224 data = BTRFS_BLOCK_GROUP_MIXED;
1228 block_group = btrfs_lookup_block_group(info, search_start);
1230 block_group = btrfs_find_block_group(root, block_group,
1231 search_start, data, 0);
1235 btrfs_release_path(root, path);
1236 btrfs_free_path(path);
1240 * finds a free extent and does all the dirty work required for allocation
1241 * returns the key for the extent through ins, and a tree buffer for
1242 * the first block of the extent through buf.
1244 * returns 0 if everything worked, non-zero otherwise.
1246 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1247 struct btrfs_root *root, u64 owner,
1248 u64 num_bytes, u64 empty_size, u64 hint_byte,
1249 u64 search_end, struct btrfs_key *ins, int data)
1253 u64 super_used, root_used;
1254 u64 search_start = 0;
1255 struct btrfs_fs_info *info = root->fs_info;
1256 struct btrfs_root *extent_root = info->extent_root;
1257 struct btrfs_extent_item extent_item;
1259 btrfs_set_stack_extent_refs(&extent_item, 1);
1260 btrfs_set_stack_extent_owner(&extent_item, owner);
1262 WARN_ON(num_bytes < root->sectorsize);
1263 ret = find_free_extent(trans, root, num_bytes, empty_size,
1264 search_start, search_end, hint_byte, ins,
1265 trans->alloc_exclude_start,
1266 trans->alloc_exclude_nr, data);
1271 /* block accounting for super block */
1272 super_used = btrfs_super_bytes_used(&info->super_copy);
1273 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1275 /* block accounting for root item */
1276 root_used = btrfs_root_used(&root->root_item);
1277 btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1279 clear_extent_dirty(&root->fs_info->free_space_cache,
1280 ins->objectid, ins->objectid + ins->offset - 1,
1283 if (root == extent_root) {
1284 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1285 ins->objectid + ins->offset - 1,
1286 EXTENT_LOCKED, GFP_NOFS);
1291 WARN_ON(trans->alloc_exclude_nr);
1292 trans->alloc_exclude_start = ins->objectid;
1293 trans->alloc_exclude_nr = ins->offset;
1294 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1295 sizeof(extent_item));
1297 trans->alloc_exclude_start = 0;
1298 trans->alloc_exclude_nr = 0;
1301 finish_current_insert(trans, extent_root);
1302 pending_ret = del_pending_extents(trans, extent_root);
1312 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1319 * helper function to allocate a block for a given tree
1320 * returns the tree buffer or NULL.
1322 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1323 struct btrfs_root *root,
1324 u32 blocksize, u64 hint,
1327 struct btrfs_key ins;
1329 struct extent_buffer *buf;
1331 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1332 blocksize, empty_size, hint,
1336 return ERR_PTR(ret);
1338 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1340 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1341 return ERR_PTR(-ENOMEM);
1343 btrfs_set_buffer_uptodate(buf);
1344 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1345 buf->start + buf->len - 1, GFP_NOFS);
1346 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1347 buf->start, buf->start + buf->len - 1,
1348 EXTENT_CSUM, GFP_NOFS);
1349 buf->flags |= EXTENT_CSUM;
1350 btrfs_set_buffer_defrag(buf);
1351 trans->blocks_used++;
1355 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1356 struct btrfs_root *root, struct extent_buffer *leaf)
1358 struct btrfs_key key;
1359 struct btrfs_file_extent_item *fi;
1364 BUG_ON(!btrfs_is_leaf(leaf));
1365 nritems = btrfs_header_nritems(leaf);
1366 for (i = 0; i < nritems; i++) {
1369 btrfs_item_key_to_cpu(leaf, &key, i);
1370 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1372 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1373 if (btrfs_file_extent_type(leaf, fi) ==
1374 BTRFS_FILE_EXTENT_INLINE)
1377 * FIXME make sure to insert a trans record that
1378 * repeats the snapshot del on crash
1380 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1381 if (disk_bytenr == 0)
1383 ret = btrfs_free_extent(trans, root, disk_bytenr,
1384 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1390 static void reada_walk_down(struct btrfs_root *root,
1391 struct extent_buffer *node)
1401 nritems = btrfs_header_nritems(node);
1402 level = btrfs_header_level(node);
1403 for (i = 0; i < nritems; i++) {
1404 bytenr = btrfs_node_blockptr(node, i);
1405 blocksize = btrfs_level_size(root, level - 1);
1406 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1410 mutex_unlock(&root->fs_info->fs_mutex);
1411 ret = readahead_tree_block(root, bytenr, blocksize);
1413 mutex_lock(&root->fs_info->fs_mutex);
1420 * helper function for drop_snapshot, this walks down the tree dropping ref
1421 * counts as it goes.
1423 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1424 *root, struct btrfs_path *path, int *level)
1426 struct extent_buffer *next;
1427 struct extent_buffer *cur;
1433 WARN_ON(*level < 0);
1434 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1435 ret = lookup_extent_ref(trans, root,
1436 path->nodes[*level]->start,
1437 path->nodes[*level]->len, &refs);
1443 * walk down to the last node level and free all the leaves
1445 while(*level >= 0) {
1446 WARN_ON(*level < 0);
1447 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1448 cur = path->nodes[*level];
1450 if (*level > 0 && path->slots[*level] == 0)
1451 reada_walk_down(root, cur);
1453 if (btrfs_header_level(cur) != *level)
1456 if (path->slots[*level] >=
1457 btrfs_header_nritems(cur))
1460 ret = drop_leaf_ref(trans, root, cur);
1464 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1465 blocksize = btrfs_level_size(root, *level - 1);
1466 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1469 path->slots[*level]++;
1470 ret = btrfs_free_extent(trans, root, bytenr,
1475 next = btrfs_find_tree_block(root, bytenr, blocksize);
1476 if (!next || !btrfs_buffer_uptodate(next)) {
1477 free_extent_buffer(next);
1478 mutex_unlock(&root->fs_info->fs_mutex);
1479 next = read_tree_block(root, bytenr, blocksize);
1480 mutex_lock(&root->fs_info->fs_mutex);
1482 /* we dropped the lock, check one more time */
1483 ret = lookup_extent_ref(trans, root, bytenr,
1487 path->slots[*level]++;
1488 free_extent_buffer(next);
1489 ret = btrfs_free_extent(trans, root,
1490 bytenr, blocksize, 1);
1495 WARN_ON(*level <= 0);
1496 if (path->nodes[*level-1])
1497 free_extent_buffer(path->nodes[*level-1]);
1498 path->nodes[*level-1] = next;
1499 *level = btrfs_header_level(next);
1500 path->slots[*level] = 0;
1503 WARN_ON(*level < 0);
1504 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1505 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1506 path->nodes[*level]->len, 1);
1507 free_extent_buffer(path->nodes[*level]);
1508 path->nodes[*level] = NULL;
1515 * helper for dropping snapshots. This walks back up the tree in the path
1516 * to find the first node higher up where we haven't yet gone through
1519 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1520 *root, struct btrfs_path *path, int *level)
1525 struct btrfs_root_item *root_item = &root->root_item;
1527 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1528 slot = path->slots[i];
1529 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1530 struct extent_buffer *node;
1531 struct btrfs_disk_key disk_key;
1532 node = path->nodes[i];
1535 WARN_ON(*level == 0);
1536 btrfs_node_key(node, &disk_key, path->slots[i]);
1537 memcpy(&root_item->drop_progress,
1538 &disk_key, sizeof(disk_key));
1539 root_item->drop_level = i;
1542 ret = btrfs_free_extent(trans, root,
1543 path->nodes[*level]->start,
1544 path->nodes[*level]->len, 1);
1546 free_extent_buffer(path->nodes[*level]);
1547 path->nodes[*level] = NULL;
1555 * drop the reference count on the tree rooted at 'snap'. This traverses
1556 * the tree freeing any blocks that have a ref count of zero after being
1559 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1565 struct btrfs_path *path;
1568 struct btrfs_root_item *root_item = &root->root_item;
1570 path = btrfs_alloc_path();
1573 level = btrfs_header_level(root->node);
1575 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1576 path->nodes[level] = root->node;
1577 extent_buffer_get(root->node);
1578 path->slots[level] = 0;
1580 struct btrfs_key key;
1581 struct btrfs_disk_key found_key;
1582 struct extent_buffer *node;
1584 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1585 level = root_item->drop_level;
1586 path->lowest_level = level;
1587 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1592 node = path->nodes[level];
1593 btrfs_node_key(node, &found_key, path->slots[level]);
1594 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1595 sizeof(found_key)));
1598 wret = walk_down_tree(trans, root, path, &level);
1604 wret = walk_up_tree(trans, root, path, &level);
1612 for (i = 0; i <= orig_level; i++) {
1613 if (path->nodes[i]) {
1614 free_extent_buffer(path->nodes[i]);
1615 path->nodes[i] = NULL;
1619 btrfs_free_path(path);
1623 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1630 ret = find_first_extent_bit(&info->block_group_cache, 0,
1631 &start, &end, (unsigned int)-1);
1634 ret = get_state_private(&info->block_group_cache, start, &ptr);
1636 kfree((void *)(unsigned long)ptr);
1637 clear_extent_bits(&info->block_group_cache, start,
1638 end, (unsigned int)-1, GFP_NOFS);
1641 ret = find_first_extent_bit(&info->free_space_cache, 0,
1642 &start, &end, EXTENT_DIRTY);
1645 clear_extent_dirty(&info->free_space_cache, start,
1651 int btrfs_read_block_groups(struct btrfs_root *root)
1653 struct btrfs_path *path;
1657 struct btrfs_block_group_cache *cache;
1658 struct btrfs_fs_info *info = root->fs_info;
1659 struct extent_map_tree *block_group_cache;
1660 struct btrfs_key key;
1661 struct btrfs_key found_key;
1662 struct extent_buffer *leaf;
1664 block_group_cache = &info->block_group_cache;
1666 root = info->extent_root;
1668 key.offset = BTRFS_BLOCK_GROUP_SIZE;
1669 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1671 path = btrfs_alloc_path();
1676 ret = btrfs_search_slot(NULL, info->extent_root,
1682 leaf = path->nodes[0];
1683 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1684 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1690 read_extent_buffer(leaf, &cache->item,
1691 btrfs_item_ptr_offset(leaf, path->slots[0]),
1692 sizeof(cache->item));
1693 memcpy(&cache->key, &found_key, sizeof(found_key));
1696 key.objectid = found_key.objectid + found_key.offset;
1697 btrfs_release_path(root, path);
1699 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
1700 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1701 cache->data = BTRFS_BLOCK_GROUP_MIXED;
1702 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1703 bit = BLOCK_GROUP_DATA;
1704 cache->data = BTRFS_BLOCK_GROUP_DATA;
1706 bit = BLOCK_GROUP_METADATA;
1710 /* use EXTENT_LOCKED to prevent merging */
1711 set_extent_bits(block_group_cache, found_key.objectid,
1712 found_key.objectid + found_key.offset - 1,
1713 bit | EXTENT_LOCKED, GFP_NOFS);
1714 set_state_private(block_group_cache, found_key.objectid,
1715 (unsigned long)cache);
1718 btrfs_super_total_bytes(&info->super_copy))
1722 btrfs_free_path(path);