Btrfs: Implement generation numbers in block pointers
[linux-2.6] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "hash.h"
21 #include "ctree.h"
22 #include "disk-io.h"
23 #include "print-tree.h"
24 #include "transaction.h"
25
26 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK
27 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
28 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
29
30 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
31                                  btrfs_root *extent_root);
32 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
33                                btrfs_root *extent_root);
34
35 static int cache_block_group(struct btrfs_root *root,
36                              struct btrfs_block_group_cache *block_group)
37 {
38         struct btrfs_path *path;
39         int ret;
40         struct btrfs_key key;
41         struct extent_buffer *leaf;
42         struct extent_map_tree *free_space_cache;
43         int slot;
44         u64 last = 0;
45         u64 hole_size;
46         u64 first_free;
47         int found = 0;
48
49         if (!block_group)
50                 return 0;
51
52         root = root->fs_info->extent_root;
53         free_space_cache = &root->fs_info->free_space_cache;
54
55         if (block_group->cached)
56                 return 0;
57
58         path = btrfs_alloc_path();
59         if (!path)
60                 return -ENOMEM;
61
62         path->reada = 2;
63         first_free = block_group->key.objectid;
64         key.objectid = block_group->key.objectid;
65         key.offset = 0;
66
67         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
68         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
69
70         if (ret < 0)
71                 return ret;
72
73         if (ret && path->slots[0] > 0)
74                 path->slots[0]--;
75
76         while(1) {
77                 leaf = path->nodes[0];
78                 slot = path->slots[0];
79                 if (slot >= btrfs_header_nritems(leaf)) {
80                         ret = btrfs_next_leaf(root, path);
81                         if (ret < 0)
82                                 goto err;
83                         if (ret == 0) {
84                                 continue;
85                         } else {
86                                 break;
87                         }
88                 }
89
90                 btrfs_item_key_to_cpu(leaf, &key, slot);
91                 if (key.objectid < block_group->key.objectid) {
92                         if (key.objectid + key.offset > first_free)
93                                 first_free = key.objectid + key.offset;
94                         goto next;
95                 }
96
97                 if (key.objectid >= block_group->key.objectid +
98                     block_group->key.offset) {
99                         break;
100                 }
101
102                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
103                         if (!found) {
104                                 last = first_free;
105                                 found = 1;
106                         }
107                         if (key.objectid > last) {
108                                 hole_size = key.objectid - last;
109                                 set_extent_dirty(free_space_cache, last,
110                                                  last + hole_size - 1,
111                                                  GFP_NOFS);
112                         }
113                         last = key.objectid + key.offset;
114                 }
115 next:
116                 path->slots[0]++;
117         }
118
119         if (!found)
120                 last = first_free;
121         if (block_group->key.objectid +
122             block_group->key.offset > last) {
123                 hole_size = block_group->key.objectid +
124                         block_group->key.offset - last;
125                 set_extent_dirty(free_space_cache, last,
126                                  last + hole_size - 1, GFP_NOFS);
127         }
128         block_group->cached = 1;
129 err:
130         btrfs_free_path(path);
131         return 0;
132 }
133
134 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
135                                                          btrfs_fs_info *info,
136                                                          u64 bytenr)
137 {
138         struct extent_map_tree *block_group_cache;
139         struct btrfs_block_group_cache *block_group = NULL;
140         u64 ptr;
141         u64 start;
142         u64 end;
143         int ret;
144
145         block_group_cache = &info->block_group_cache;
146         ret = find_first_extent_bit(block_group_cache,
147                                     bytenr, &start, &end,
148                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA);
149         if (ret) {
150                 return NULL;
151         }
152         ret = get_state_private(block_group_cache, start, &ptr);
153         if (ret)
154                 return NULL;
155
156         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
157         if (block_group->key.objectid <= bytenr && bytenr <
158             block_group->key.objectid + block_group->key.offset)
159                 return block_group;
160         return NULL;
161 }
162 static u64 find_search_start(struct btrfs_root *root,
163                              struct btrfs_block_group_cache **cache_ret,
164                              u64 search_start, int num,
165                              int data, int full_scan)
166 {
167         int ret;
168         struct btrfs_block_group_cache *cache = *cache_ret;
169         u64 last;
170         u64 start = 0;
171         u64 end = 0;
172         u64 cache_miss = 0;
173         int wrapped = 0;
174
175         if (!cache) {
176                 goto out;
177         }
178 again:
179         ret = cache_block_group(root, cache);
180         if (ret)
181                 goto out;
182
183         last = max(search_start, cache->key.objectid);
184
185         while(1) {
186                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
187                                             last, &start, &end, EXTENT_DIRTY);
188                 if (ret) {
189                         if (!cache_miss)
190                                 cache_miss = last;
191                         goto new_group;
192                 }
193
194                 start = max(last, start);
195                 last = end + 1;
196                 if (last - start < num) {
197                         if (last == cache->key.objectid + cache->key.offset)
198                                 cache_miss = start;
199                         continue;
200                 }
201                 if (data != BTRFS_BLOCK_GROUP_MIXED &&
202                     start + num > cache->key.objectid + cache->key.offset)
203                         goto new_group;
204                 return start;
205         }
206 out:
207         cache = btrfs_lookup_block_group(root->fs_info, search_start);
208         if (!cache) {
209                 printk("Unable to find block group for %Lu\n",
210                        search_start);
211                 WARN_ON(1);
212                 return search_start;
213         }
214         return search_start;
215
216 new_group:
217         last = cache->key.objectid + cache->key.offset;
218 wrapped:
219         cache = btrfs_lookup_block_group(root->fs_info, last);
220         if (!cache) {
221 no_cache:
222                 if (!wrapped) {
223                         wrapped = 1;
224                         last = search_start;
225                         data = BTRFS_BLOCK_GROUP_MIXED;
226                         goto wrapped;
227                 }
228                 goto out;
229         }
230         if (cache_miss && !cache->cached) {
231                 cache_block_group(root, cache);
232                 last = cache_miss;
233                 cache = btrfs_lookup_block_group(root->fs_info, last);
234         }
235         cache = btrfs_find_block_group(root, cache, last, data, 0);
236         if (!cache)
237                 goto no_cache;
238         *cache_ret = cache;
239         cache_miss = 0;
240         goto again;
241 }
242
243 static u64 div_factor(u64 num, int factor)
244 {
245         if (factor == 10)
246                 return num;
247         num *= factor;
248         do_div(num, 10);
249         return num;
250 }
251
252 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
253                                                  struct btrfs_block_group_cache
254                                                  *hint, u64 search_start,
255                                                  int data, int owner)
256 {
257         struct btrfs_block_group_cache *cache;
258         struct extent_map_tree *block_group_cache;
259         struct btrfs_block_group_cache *found_group = NULL;
260         struct btrfs_fs_info *info = root->fs_info;
261         u64 used;
262         u64 last = 0;
263         u64 hint_last;
264         u64 start;
265         u64 end;
266         u64 free_check;
267         u64 ptr;
268         int bit;
269         int ret;
270         int full_search = 0;
271         int factor = 8;
272         int data_swap = 0;
273
274         block_group_cache = &info->block_group_cache;
275
276         if (!owner)
277                 factor = 8;
278
279         if (data == BTRFS_BLOCK_GROUP_MIXED) {
280                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
281                 factor = 10;
282         } else if (data)
283                 bit = BLOCK_GROUP_DATA;
284         else
285                 bit = BLOCK_GROUP_METADATA;
286
287         if (search_start) {
288                 struct btrfs_block_group_cache *shint;
289                 shint = btrfs_lookup_block_group(info, search_start);
290                 if (shint && (shint->data == data ||
291                               shint->data == BTRFS_BLOCK_GROUP_MIXED)) {
292                         used = btrfs_block_group_used(&shint->item);
293                         if (used + shint->pinned <
294                             div_factor(shint->key.offset, factor)) {
295                                 return shint;
296                         }
297                 }
298         }
299         if (hint && (hint->data == data ||
300                      hint->data == BTRFS_BLOCK_GROUP_MIXED)) {
301                 used = btrfs_block_group_used(&hint->item);
302                 if (used + hint->pinned <
303                     div_factor(hint->key.offset, factor)) {
304                         return hint;
305                 }
306                 last = hint->key.objectid + hint->key.offset;
307                 hint_last = last;
308         } else {
309                 if (hint)
310                         hint_last = max(hint->key.objectid, search_start);
311                 else
312                         hint_last = search_start;
313
314                 last = hint_last;
315         }
316 again:
317         while(1) {
318                 ret = find_first_extent_bit(block_group_cache, last,
319                                             &start, &end, bit);
320                 if (ret)
321                         break;
322
323                 ret = get_state_private(block_group_cache, start, &ptr);
324                 if (ret)
325                         break;
326
327                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
328                 last = cache->key.objectid + cache->key.offset;
329                 used = btrfs_block_group_used(&cache->item);
330
331                 if (full_search)
332                         free_check = cache->key.offset;
333                 else
334                         free_check = div_factor(cache->key.offset, factor);
335                 if (used + cache->pinned < free_check) {
336                         found_group = cache;
337                         goto found;
338                 }
339                 cond_resched();
340         }
341         if (!full_search) {
342                 last = search_start;
343                 full_search = 1;
344                 goto again;
345         }
346         if (!data_swap) {
347                 data_swap = 1;
348                 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
349                 last = search_start;
350                 goto again;
351         }
352 found:
353         return found_group;
354 }
355
356 static u64 hash_extent_ref(u64 root_objectid, u64 root_generation,
357                            u64 owner, u64 owner_offset)
358 {
359         u32 high_crc = ~(u32)0;
360         u32 low_crc = ~(u32)0;
361         __le64 lenum;
362
363         lenum = cpu_to_le64(root_objectid);
364         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
365         lenum = cpu_to_le64(root_generation);
366         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
367
368         lenum = cpu_to_le64(owner);
369         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
370
371         lenum = cpu_to_le64(owner_offset);
372         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
373
374         return ((u64)high_crc << 32) | (u64)low_crc;
375 }
376
377 int insert_extent_ref(struct btrfs_trans_handle *trans,
378                                 struct btrfs_root *root,
379                                 struct btrfs_path *path,
380                                 u64 bytenr,
381                                 u64 root_objectid, u64 root_generation,
382                                 u64 owner, u64 owner_offset)
383 {
384         u64 hash;
385         struct btrfs_key key;
386         struct btrfs_extent_ref ref;
387         struct extent_buffer *l;
388         struct btrfs_extent_item *item;
389         int ret;
390
391         btrfs_set_stack_ref_root(&ref, root_objectid);
392         btrfs_set_stack_ref_generation(&ref, root_generation);
393         btrfs_set_stack_ref_objectid(&ref, owner);
394         btrfs_set_stack_ref_offset(&ref, owner_offset);
395
396         ret = btrfs_name_hash(&ref, sizeof(ref), &hash);
397         key.offset = hash;
398         key.objectid = bytenr;
399         key.type = BTRFS_EXTENT_REF_KEY;
400
401         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
402         while (ret == -EEXIST) {
403
404         }
405
406 }
407
408 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
409                                 struct btrfs_root *root,
410                                 u64 bytenr, u64 num_bytes,
411                                 u64 root_objectid, u64 root_generation,
412                                 u64 owner, u64 owner_offset)
413 {
414         struct btrfs_path *path;
415         int ret;
416         struct btrfs_key key;
417         struct extent_buffer *l;
418         struct btrfs_extent_item *item;
419         u32 refs;
420
421         WARN_ON(num_bytes < root->sectorsize);
422         path = btrfs_alloc_path();
423         if (!path)
424                 return -ENOMEM;
425
426         key.objectid = bytenr;
427         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
428         key.offset = num_bytes;
429         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
430                                 0, 1);
431         if (ret < 0)
432                 return ret;
433         if (ret != 0) {
434                 BUG();
435         }
436         BUG_ON(ret != 0);
437         l = path->nodes[0];
438         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
439         refs = btrfs_extent_refs(l, item);
440         btrfs_set_extent_refs(l, item, refs + 1);
441         btrfs_mark_buffer_dirty(path->nodes[0]);
442
443         btrfs_release_path(root->fs_info->extent_root, path);
444         finish_current_insert(trans, root->fs_info->extent_root);
445         del_pending_extents(trans, root->fs_info->extent_root);
446
447         btrfs_free_path(path);
448         return 0;
449 }
450
451 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
452                          struct btrfs_root *root)
453 {
454         finish_current_insert(trans, root->fs_info->extent_root);
455         del_pending_extents(trans, root->fs_info->extent_root);
456         return 0;
457 }
458
459 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
460                              struct btrfs_root *root, u64 bytenr,
461                              u64 num_bytes, u32 *refs)
462 {
463         struct btrfs_path *path;
464         int ret;
465         struct btrfs_key key;
466         struct extent_buffer *l;
467         struct btrfs_extent_item *item;
468
469         WARN_ON(num_bytes < root->sectorsize);
470         path = btrfs_alloc_path();
471         key.objectid = bytenr;
472         key.offset = num_bytes;
473         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
474         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
475                                 0, 0);
476         if (ret < 0)
477                 goto out;
478         if (ret != 0) {
479                 btrfs_print_leaf(root, path->nodes[0]);
480                 printk("failed to find block number %Lu\n", bytenr);
481                 BUG();
482         }
483         l = path->nodes[0];
484         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
485         *refs = btrfs_extent_refs(l, item);
486 out:
487         btrfs_free_path(path);
488         return 0;
489 }
490
491 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
492                        struct btrfs_root *root)
493 {
494         return btrfs_inc_extent_ref(trans, root, root->node->start,
495                                     root->node->len);
496 }
497
498 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
499                   struct extent_buffer *buf)
500 {
501         u64 bytenr;
502         u32 nritems;
503         struct btrfs_key key;
504         struct btrfs_file_extent_item *fi;
505         int i;
506         int level;
507         int ret;
508         int faili;
509         int err;
510
511         if (!root->ref_cows)
512                 return 0;
513
514         level = btrfs_header_level(buf);
515         nritems = btrfs_header_nritems(buf);
516         for (i = 0; i < nritems; i++) {
517                 if (level == 0) {
518                         u64 disk_bytenr;
519                         btrfs_item_key_to_cpu(buf, &key, i);
520                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
521                                 continue;
522                         fi = btrfs_item_ptr(buf, i,
523                                             struct btrfs_file_extent_item);
524                         if (btrfs_file_extent_type(buf, fi) ==
525                             BTRFS_FILE_EXTENT_INLINE)
526                                 continue;
527                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
528                         if (disk_bytenr == 0)
529                                 continue;
530                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
531                                     btrfs_file_extent_disk_num_bytes(buf, fi));
532                         if (ret) {
533                                 faili = i;
534                                 goto fail;
535                         }
536                 } else {
537                         bytenr = btrfs_node_blockptr(buf, i);
538                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
539                                            btrfs_level_size(root, level - 1));
540                         if (ret) {
541                                 faili = i;
542                                 goto fail;
543                         }
544                 }
545         }
546         return 0;
547 fail:
548         WARN_ON(1);
549         for (i =0; i < faili; i++) {
550                 if (level == 0) {
551                         u64 disk_bytenr;
552                         btrfs_item_key_to_cpu(buf, &key, i);
553                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
554                                 continue;
555                         fi = btrfs_item_ptr(buf, i,
556                                             struct btrfs_file_extent_item);
557                         if (btrfs_file_extent_type(buf, fi) ==
558                             BTRFS_FILE_EXTENT_INLINE)
559                                 continue;
560                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
561                         if (disk_bytenr == 0)
562                                 continue;
563                         err = btrfs_free_extent(trans, root, disk_bytenr,
564                                     btrfs_file_extent_disk_num_bytes(buf,
565                                                                       fi), 0);
566                         BUG_ON(err);
567                 } else {
568                         bytenr = btrfs_node_blockptr(buf, i);
569                         err = btrfs_free_extent(trans, root, bytenr,
570                                         btrfs_level_size(root, level - 1), 0);
571                         BUG_ON(err);
572                 }
573         }
574         return ret;
575 }
576
577 static int write_one_cache_group(struct btrfs_trans_handle *trans,
578                                  struct btrfs_root *root,
579                                  struct btrfs_path *path,
580                                  struct btrfs_block_group_cache *cache)
581 {
582         int ret;
583         int pending_ret;
584         struct btrfs_root *extent_root = root->fs_info->extent_root;
585         unsigned long bi;
586         struct extent_buffer *leaf;
587
588         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
589         if (ret < 0)
590                 goto fail;
591         BUG_ON(ret);
592
593         leaf = path->nodes[0];
594         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
595         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
596         btrfs_mark_buffer_dirty(leaf);
597         btrfs_release_path(extent_root, path);
598 fail:
599         finish_current_insert(trans, extent_root);
600         pending_ret = del_pending_extents(trans, extent_root);
601         if (ret)
602                 return ret;
603         if (pending_ret)
604                 return pending_ret;
605         return 0;
606
607 }
608
609 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
610                                    struct btrfs_root *root)
611 {
612         struct extent_map_tree *block_group_cache;
613         struct btrfs_block_group_cache *cache;
614         int ret;
615         int err = 0;
616         int werr = 0;
617         struct btrfs_path *path;
618         u64 last = 0;
619         u64 start;
620         u64 end;
621         u64 ptr;
622
623         block_group_cache = &root->fs_info->block_group_cache;
624         path = btrfs_alloc_path();
625         if (!path)
626                 return -ENOMEM;
627
628         while(1) {
629                 ret = find_first_extent_bit(block_group_cache, last,
630                                             &start, &end, BLOCK_GROUP_DIRTY);
631                 if (ret)
632                         break;
633
634                 last = end + 1;
635                 ret = get_state_private(block_group_cache, start, &ptr);
636                 if (ret)
637                         break;
638
639                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
640                 err = write_one_cache_group(trans, root,
641                                             path, cache);
642                 /*
643                  * if we fail to write the cache group, we want
644                  * to keep it marked dirty in hopes that a later
645                  * write will work
646                  */
647                 if (err) {
648                         werr = err;
649                         continue;
650                 }
651                 clear_extent_bits(block_group_cache, start, end,
652                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
653         }
654         btrfs_free_path(path);
655         return werr;
656 }
657
658 static int update_block_group(struct btrfs_trans_handle *trans,
659                               struct btrfs_root *root,
660                               u64 bytenr, u64 num_bytes, int alloc,
661                               int mark_free, int data)
662 {
663         struct btrfs_block_group_cache *cache;
664         struct btrfs_fs_info *info = root->fs_info;
665         u64 total = num_bytes;
666         u64 old_val;
667         u64 byte_in_group;
668         u64 start;
669         u64 end;
670
671         while(total) {
672                 cache = btrfs_lookup_block_group(info, bytenr);
673                 if (!cache) {
674                         return -1;
675                 }
676                 byte_in_group = bytenr - cache->key.objectid;
677                 WARN_ON(byte_in_group > cache->key.offset);
678                 start = cache->key.objectid;
679                 end = start + cache->key.offset - 1;
680                 set_extent_bits(&info->block_group_cache, start, end,
681                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
682
683                 old_val = btrfs_block_group_used(&cache->item);
684                 num_bytes = min(total, cache->key.offset - byte_in_group);
685                 if (alloc) {
686                         if (cache->data != data &&
687                             old_val < (cache->key.offset >> 1)) {
688                                 int bit_to_clear;
689                                 int bit_to_set;
690                                 cache->data = data;
691                                 if (data) {
692                                         bit_to_clear = BLOCK_GROUP_METADATA;
693                                         bit_to_set = BLOCK_GROUP_DATA;
694                                         cache->item.flags &=
695                                                 ~BTRFS_BLOCK_GROUP_MIXED;
696                                         cache->item.flags |=
697                                                 BTRFS_BLOCK_GROUP_DATA;
698                                 } else {
699                                         bit_to_clear = BLOCK_GROUP_DATA;
700                                         bit_to_set = BLOCK_GROUP_METADATA;
701                                         cache->item.flags &=
702                                                 ~BTRFS_BLOCK_GROUP_MIXED;
703                                         cache->item.flags &=
704                                                 ~BTRFS_BLOCK_GROUP_DATA;
705                                 }
706                                 clear_extent_bits(&info->block_group_cache,
707                                                   start, end, bit_to_clear,
708                                                   GFP_NOFS);
709                                 set_extent_bits(&info->block_group_cache,
710                                                 start, end, bit_to_set,
711                                                 GFP_NOFS);
712                         } else if (cache->data != data &&
713                                    cache->data != BTRFS_BLOCK_GROUP_MIXED) {
714                                 cache->data = BTRFS_BLOCK_GROUP_MIXED;
715                                 set_extent_bits(&info->block_group_cache,
716                                                 start, end,
717                                                 BLOCK_GROUP_DATA |
718                                                 BLOCK_GROUP_METADATA,
719                                                 GFP_NOFS);
720                         }
721                         old_val += num_bytes;
722                 } else {
723                         old_val -= num_bytes;
724                         if (mark_free) {
725                                 set_extent_dirty(&info->free_space_cache,
726                                                  bytenr, bytenr + num_bytes - 1,
727                                                  GFP_NOFS);
728                         }
729                 }
730                 btrfs_set_block_group_used(&cache->item, old_val);
731                 total -= num_bytes;
732                 bytenr += num_bytes;
733         }
734         return 0;
735 }
736 static int update_pinned_extents(struct btrfs_root *root,
737                                 u64 bytenr, u64 num, int pin)
738 {
739         u64 len;
740         struct btrfs_block_group_cache *cache;
741         struct btrfs_fs_info *fs_info = root->fs_info;
742
743         if (pin) {
744                 set_extent_dirty(&fs_info->pinned_extents,
745                                 bytenr, bytenr + num - 1, GFP_NOFS);
746         } else {
747                 clear_extent_dirty(&fs_info->pinned_extents,
748                                 bytenr, bytenr + num - 1, GFP_NOFS);
749         }
750         while (num > 0) {
751                 cache = btrfs_lookup_block_group(fs_info, bytenr);
752                 WARN_ON(!cache);
753                 len = min(num, cache->key.offset -
754                           (bytenr - cache->key.objectid));
755                 if (pin) {
756                         cache->pinned += len;
757                         fs_info->total_pinned += len;
758                 } else {
759                         cache->pinned -= len;
760                         fs_info->total_pinned -= len;
761                 }
762                 bytenr += len;
763                 num -= len;
764         }
765         return 0;
766 }
767
768 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy)
769 {
770         u64 last = 0;
771         u64 start;
772         u64 end;
773         struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents;
774         int ret;
775
776         while(1) {
777                 ret = find_first_extent_bit(pinned_extents, last,
778                                             &start, &end, EXTENT_DIRTY);
779                 if (ret)
780                         break;
781                 set_extent_dirty(copy, start, end, GFP_NOFS);
782                 last = end + 1;
783         }
784         return 0;
785 }
786
787 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
788                                struct btrfs_root *root,
789                                struct extent_map_tree *unpin)
790 {
791         u64 start;
792         u64 end;
793         int ret;
794         struct extent_map_tree *free_space_cache;
795         free_space_cache = &root->fs_info->free_space_cache;
796
797         while(1) {
798                 ret = find_first_extent_bit(unpin, 0, &start, &end,
799                                             EXTENT_DIRTY);
800                 if (ret)
801                         break;
802                 update_pinned_extents(root, start, end + 1 - start, 0);
803                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
804                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
805         }
806         return 0;
807 }
808
809 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
810                                  btrfs_root *extent_root)
811 {
812         struct btrfs_key ins;
813         struct btrfs_extent_item extent_item;
814         int ret;
815         int err = 0;
816         u64 start;
817         u64 end;
818         struct btrfs_fs_info *info = extent_root->fs_info;
819
820         btrfs_set_stack_extent_refs(&extent_item, 1);
821         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
822         btrfs_set_stack_extent_owner(&extent_item,
823                                      extent_root->root_key.objectid);
824
825         while(1) {
826                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
827                                             &end, EXTENT_LOCKED);
828                 if (ret)
829                         break;
830
831                 ins.objectid = start;
832                 ins.offset = end + 1 - start;
833                 err = btrfs_insert_item(trans, extent_root, &ins,
834                                         &extent_item, sizeof(extent_item));
835                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
836                                   GFP_NOFS);
837         }
838         return 0;
839 }
840
841 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
842                           int pending)
843 {
844         int err = 0;
845         struct extent_buffer *buf;
846
847         if (!pending) {
848                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
849                 if (buf) {
850                         if (btrfs_buffer_uptodate(buf)) {
851                                 u64 transid =
852                                     root->fs_info->running_transaction->transid;
853                                 if (btrfs_header_generation(buf) == transid) {
854                                         free_extent_buffer(buf);
855                                         return 1;
856                                 }
857                         }
858                         free_extent_buffer(buf);
859                 }
860                 update_pinned_extents(root, bytenr, num_bytes, 1);
861         } else {
862                 set_extent_bits(&root->fs_info->pending_del,
863                                 bytenr, bytenr + num_bytes - 1,
864                                 EXTENT_LOCKED, GFP_NOFS);
865         }
866         BUG_ON(err < 0);
867         return 0;
868 }
869
870 /*
871  * remove an extent from the root, returns 0 on success
872  */
873 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
874                          *root, u64 bytenr, u64 num_bytes, int pin,
875                          int mark_free)
876 {
877         struct btrfs_path *path;
878         struct btrfs_key key;
879         struct btrfs_fs_info *info = root->fs_info;
880         struct btrfs_root *extent_root = info->extent_root;
881         struct extent_buffer *leaf;
882         int ret;
883         struct btrfs_extent_item *ei;
884         u32 refs;
885
886         key.objectid = bytenr;
887         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
888         key.offset = num_bytes;
889
890         path = btrfs_alloc_path();
891         if (!path)
892                 return -ENOMEM;
893
894         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
895         if (ret < 0)
896                 return ret;
897         BUG_ON(ret);
898
899         leaf = path->nodes[0];
900         ei = btrfs_item_ptr(leaf, path->slots[0],
901                             struct btrfs_extent_item);
902         refs = btrfs_extent_refs(leaf, ei);
903         BUG_ON(refs == 0);
904         refs -= 1;
905         btrfs_set_extent_refs(leaf, ei, refs);
906         btrfs_mark_buffer_dirty(leaf);
907
908         if (refs == 0) {
909                 u64 super_used;
910                 u64 root_used;
911
912                 if (pin) {
913                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
914                         if (ret > 0)
915                                 mark_free = 1;
916                         BUG_ON(ret < 0);
917                 }
918
919                 /* block accounting for super block */
920                 super_used = btrfs_super_bytes_used(&info->super_copy);
921                 btrfs_set_super_bytes_used(&info->super_copy,
922                                            super_used - num_bytes);
923
924                 /* block accounting for root item */
925                 root_used = btrfs_root_used(&root->root_item);
926                 btrfs_set_root_used(&root->root_item,
927                                            root_used - num_bytes);
928
929                 ret = btrfs_del_item(trans, extent_root, path);
930                 if (ret) {
931                         return ret;
932                 }
933                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
934                                          mark_free, 0);
935                 BUG_ON(ret);
936         }
937         btrfs_free_path(path);
938         finish_current_insert(trans, extent_root);
939         return ret;
940 }
941
942 /*
943  * find all the blocks marked as pending in the radix tree and remove
944  * them from the extent map
945  */
946 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
947                                btrfs_root *extent_root)
948 {
949         int ret;
950         int err = 0;
951         u64 start;
952         u64 end;
953         struct extent_map_tree *pending_del;
954         struct extent_map_tree *pinned_extents;
955
956         pending_del = &extent_root->fs_info->pending_del;
957         pinned_extents = &extent_root->fs_info->pinned_extents;
958
959         while(1) {
960                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
961                                             EXTENT_LOCKED);
962                 if (ret)
963                         break;
964                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
965                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
966                                   GFP_NOFS);
967                 ret = __free_extent(trans, extent_root,
968                                      start, end + 1 - start, 0, 0);
969                 if (ret)
970                         err = ret;
971         }
972         return err;
973 }
974
975 /*
976  * remove an extent from the root, returns 0 on success
977  */
978 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
979                       *root, u64 bytenr, u64 num_bytes, int pin)
980 {
981         struct btrfs_root *extent_root = root->fs_info->extent_root;
982         int pending_ret;
983         int ret;
984
985         WARN_ON(num_bytes < root->sectorsize);
986         if (root == extent_root) {
987                 pin_down_bytes(root, bytenr, num_bytes, 1);
988                 return 0;
989         }
990         ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0);
991         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
992         return ret ? ret : pending_ret;
993 }
994
995 static u64 stripe_align(struct btrfs_root *root, u64 val)
996 {
997         u64 mask = ((u64)root->stripesize - 1);
998         u64 ret = (val + mask) & ~mask;
999         return ret;
1000 }
1001
1002 /*
1003  * walks the btree of allocated extents and find a hole of a given size.
1004  * The key ins is changed to record the hole:
1005  * ins->objectid == block start
1006  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1007  * ins->offset == number of blocks
1008  * Any available blocks before search_start are skipped.
1009  */
1010 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1011                             *orig_root, u64 num_bytes, u64 empty_size,
1012                             u64 search_start, u64 search_end, u64 hint_byte,
1013                             struct btrfs_key *ins, u64 exclude_start,
1014                             u64 exclude_nr, int data)
1015 {
1016         struct btrfs_path *path;
1017         struct btrfs_key key;
1018         u64 hole_size = 0;
1019         u64 aligned;
1020         int ret;
1021         int slot = 0;
1022         u64 last_byte = 0;
1023         u64 orig_search_start = search_start;
1024         int start_found;
1025         struct extent_buffer *l;
1026         struct btrfs_root * root = orig_root->fs_info->extent_root;
1027         struct btrfs_fs_info *info = root->fs_info;
1028         u64 total_needed = num_bytes;
1029         int level;
1030         struct btrfs_block_group_cache *block_group;
1031         int full_scan = 0;
1032         int wrapped = 0;
1033         u64 cached_start;
1034
1035         WARN_ON(num_bytes < root->sectorsize);
1036         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1037
1038         level = btrfs_header_level(root->node);
1039
1040         if (num_bytes >= 32 * 1024 * 1024 && hint_byte) {
1041                 data = BTRFS_BLOCK_GROUP_MIXED;
1042         }
1043
1044         if (search_end == (u64)-1)
1045                 search_end = btrfs_super_total_bytes(&info->super_copy);
1046         if (hint_byte) {
1047                 block_group = btrfs_lookup_block_group(info, hint_byte);
1048                 if (!block_group)
1049                         hint_byte = search_start;
1050                 block_group = btrfs_find_block_group(root, block_group,
1051                                                      hint_byte, data, 1);
1052         } else {
1053                 block_group = btrfs_find_block_group(root,
1054                                                      trans->block_group,
1055                                                      search_start, data, 1);
1056         }
1057
1058         total_needed += empty_size;
1059         path = btrfs_alloc_path();
1060 check_failed:
1061         search_start = find_search_start(root, &block_group, search_start,
1062                                          total_needed, data, full_scan);
1063         search_start = stripe_align(root, search_start);
1064         cached_start = search_start;
1065         btrfs_init_path(path);
1066         ins->objectid = search_start;
1067         ins->offset = 0;
1068         start_found = 0;
1069         path->reada = 2;
1070
1071         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1072         if (ret < 0)
1073                 goto error;
1074
1075         if (path->slots[0] > 0) {
1076                 path->slots[0]--;
1077         }
1078
1079         l = path->nodes[0];
1080         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1081
1082         /*
1083          * a rare case, go back one key if we hit a block group item
1084          * instead of an extent item
1085          */
1086         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1087             key.objectid + key.offset >= search_start) {
1088                 ins->objectid = key.objectid;
1089                 ins->offset = key.offset - 1;
1090                 btrfs_release_path(root, path);
1091                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1092                 if (ret < 0)
1093                         goto error;
1094
1095                 if (path->slots[0] > 0) {
1096                         path->slots[0]--;
1097                 }
1098         }
1099
1100         while (1) {
1101                 l = path->nodes[0];
1102                 slot = path->slots[0];
1103                 if (slot >= btrfs_header_nritems(l)) {
1104                         ret = btrfs_next_leaf(root, path);
1105                         if (ret == 0)
1106                                 continue;
1107                         if (ret < 0)
1108                                 goto error;
1109
1110                         search_start = max(search_start,
1111                                            block_group->key.objectid);
1112                         if (!start_found) {
1113                                 aligned = stripe_align(root, search_start);
1114                                 ins->objectid = aligned;
1115                                 if (aligned >= search_end) {
1116                                         ret = -ENOSPC;
1117                                         goto error;
1118                                 }
1119                                 ins->offset = search_end - aligned;
1120                                 start_found = 1;
1121                                 goto check_pending;
1122                         }
1123                         ins->objectid = stripe_align(root,
1124                                                      last_byte > search_start ?
1125                                                      last_byte : search_start);
1126                         if (search_end <= ins->objectid) {
1127                                 ret = -ENOSPC;
1128                                 goto error;
1129                         }
1130                         ins->offset = search_end - ins->objectid;
1131                         BUG_ON(ins->objectid >= search_end);
1132                         goto check_pending;
1133                 }
1134                 btrfs_item_key_to_cpu(l, &key, slot);
1135
1136                 if (key.objectid >= search_start && key.objectid > last_byte &&
1137                     start_found) {
1138                         if (last_byte < search_start)
1139                                 last_byte = search_start;
1140                         aligned = stripe_align(root, last_byte);
1141                         hole_size = key.objectid - aligned;
1142                         if (key.objectid > aligned && hole_size >= num_bytes) {
1143                                 ins->objectid = aligned;
1144                                 ins->offset = hole_size;
1145                                 goto check_pending;
1146                         }
1147                 }
1148                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) {
1149                         if (!start_found) {
1150                                 last_byte = key.objectid;
1151                                 start_found = 1;
1152                         }
1153                         goto next;
1154                 }
1155
1156
1157                 start_found = 1;
1158                 last_byte = key.objectid + key.offset;
1159
1160                 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1161                     last_byte >= block_group->key.objectid +
1162                     block_group->key.offset) {
1163                         btrfs_release_path(root, path);
1164                         search_start = block_group->key.objectid +
1165                                 block_group->key.offset;
1166                         goto new_group;
1167                 }
1168 next:
1169                 path->slots[0]++;
1170                 cond_resched();
1171         }
1172 check_pending:
1173         /* we have to make sure we didn't find an extent that has already
1174          * been allocated by the map tree or the original allocation
1175          */
1176         btrfs_release_path(root, path);
1177         BUG_ON(ins->objectid < search_start);
1178
1179         if (ins->objectid + num_bytes >= search_end)
1180                 goto enospc;
1181         if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED &&
1182             ins->objectid + num_bytes > block_group->
1183             key.objectid + block_group->key.offset) {
1184                 search_start = block_group->key.objectid +
1185                         block_group->key.offset;
1186                 goto new_group;
1187         }
1188         if (test_range_bit(&info->extent_ins, ins->objectid,
1189                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1190                 search_start = ins->objectid + num_bytes;
1191                 goto new_group;
1192         }
1193         if (test_range_bit(&info->pinned_extents, ins->objectid,
1194                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1195                 search_start = ins->objectid + num_bytes;
1196                 goto new_group;
1197         }
1198         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1199             ins->objectid < exclude_start + exclude_nr)) {
1200                 search_start = exclude_start + exclude_nr;
1201                 goto new_group;
1202         }
1203         if (!data) {
1204                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1205                 if (block_group)
1206                         trans->block_group = block_group;
1207         }
1208         ins->offset = num_bytes;
1209         btrfs_free_path(path);
1210         return 0;
1211
1212 new_group:
1213         if (search_start + num_bytes >= search_end) {
1214 enospc:
1215                 search_start = orig_search_start;
1216                 if (full_scan) {
1217                         ret = -ENOSPC;
1218                         goto error;
1219                 }
1220                 if (wrapped) {
1221                         if (!full_scan)
1222                                 total_needed -= empty_size;
1223                         full_scan = 1;
1224                         data = BTRFS_BLOCK_GROUP_MIXED;
1225                 } else
1226                         wrapped = 1;
1227         }
1228         block_group = btrfs_lookup_block_group(info, search_start);
1229         cond_resched();
1230         block_group = btrfs_find_block_group(root, block_group,
1231                                              search_start, data, 0);
1232         goto check_failed;
1233
1234 error:
1235         btrfs_release_path(root, path);
1236         btrfs_free_path(path);
1237         return ret;
1238 }
1239 /*
1240  * finds a free extent and does all the dirty work required for allocation
1241  * returns the key for the extent through ins, and a tree buffer for
1242  * the first block of the extent through buf.
1243  *
1244  * returns 0 if everything worked, non-zero otherwise.
1245  */
1246 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1247                        struct btrfs_root *root, u64 owner,
1248                        u64 num_bytes, u64 empty_size, u64 hint_byte,
1249                        u64 search_end, struct btrfs_key *ins, int data)
1250 {
1251         int ret;
1252         int pending_ret;
1253         u64 super_used, root_used;
1254         u64 search_start = 0;
1255         struct btrfs_fs_info *info = root->fs_info;
1256         struct btrfs_root *extent_root = info->extent_root;
1257         struct btrfs_extent_item extent_item;
1258
1259         btrfs_set_stack_extent_refs(&extent_item, 1);
1260         btrfs_set_stack_extent_owner(&extent_item, owner);
1261
1262         WARN_ON(num_bytes < root->sectorsize);
1263         ret = find_free_extent(trans, root, num_bytes, empty_size,
1264                                search_start, search_end, hint_byte, ins,
1265                                trans->alloc_exclude_start,
1266                                trans->alloc_exclude_nr, data);
1267         BUG_ON(ret);
1268         if (ret)
1269                 return ret;
1270
1271         /* block accounting for super block */
1272         super_used = btrfs_super_bytes_used(&info->super_copy);
1273         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1274
1275         /* block accounting for root item */
1276         root_used = btrfs_root_used(&root->root_item);
1277         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1278
1279         clear_extent_dirty(&root->fs_info->free_space_cache,
1280                            ins->objectid, ins->objectid + ins->offset - 1,
1281                            GFP_NOFS);
1282
1283         if (root == extent_root) {
1284                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1285                                 ins->objectid + ins->offset - 1,
1286                                 EXTENT_LOCKED, GFP_NOFS);
1287                 WARN_ON(data == 1);
1288                 goto update_block;
1289         }
1290
1291         WARN_ON(trans->alloc_exclude_nr);
1292         trans->alloc_exclude_start = ins->objectid;
1293         trans->alloc_exclude_nr = ins->offset;
1294         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1295                                 sizeof(extent_item));
1296
1297         trans->alloc_exclude_start = 0;
1298         trans->alloc_exclude_nr = 0;
1299
1300         BUG_ON(ret);
1301         finish_current_insert(trans, extent_root);
1302         pending_ret = del_pending_extents(trans, extent_root);
1303
1304         if (ret) {
1305                 return ret;
1306         }
1307         if (pending_ret) {
1308                 return pending_ret;
1309         }
1310
1311 update_block:
1312         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1313                                  data);
1314         BUG_ON(ret);
1315         return 0;
1316 }
1317
1318 /*
1319  * helper function to allocate a block for a given tree
1320  * returns the tree buffer or NULL.
1321  */
1322 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1323                                              struct btrfs_root *root,
1324                                              u32 blocksize, u64 hint,
1325                                              u64 empty_size)
1326 {
1327         struct btrfs_key ins;
1328         int ret;
1329         struct extent_buffer *buf;
1330
1331         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1332                                  blocksize, empty_size, hint,
1333                                  (u64)-1, &ins, 0);
1334         if (ret) {
1335                 BUG_ON(ret > 0);
1336                 return ERR_PTR(ret);
1337         }
1338         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1339         if (!buf) {
1340                 btrfs_free_extent(trans, root, ins.objectid, blocksize, 0);
1341                 return ERR_PTR(-ENOMEM);
1342         }
1343         btrfs_set_buffer_uptodate(buf);
1344         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1345                          buf->start + buf->len - 1, GFP_NOFS);
1346         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree,
1347                         buf->start, buf->start + buf->len - 1,
1348                         EXTENT_CSUM, GFP_NOFS);
1349         buf->flags |= EXTENT_CSUM;
1350         btrfs_set_buffer_defrag(buf);
1351         trans->blocks_used++;
1352         return buf;
1353 }
1354
1355 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1356                          struct btrfs_root *root, struct extent_buffer *leaf)
1357 {
1358         struct btrfs_key key;
1359         struct btrfs_file_extent_item *fi;
1360         int i;
1361         int nritems;
1362         int ret;
1363
1364         BUG_ON(!btrfs_is_leaf(leaf));
1365         nritems = btrfs_header_nritems(leaf);
1366         for (i = 0; i < nritems; i++) {
1367                 u64 disk_bytenr;
1368
1369                 btrfs_item_key_to_cpu(leaf, &key, i);
1370                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1371                         continue;
1372                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1373                 if (btrfs_file_extent_type(leaf, fi) ==
1374                     BTRFS_FILE_EXTENT_INLINE)
1375                         continue;
1376                 /*
1377                  * FIXME make sure to insert a trans record that
1378                  * repeats the snapshot del on crash
1379                  */
1380                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1381                 if (disk_bytenr == 0)
1382                         continue;
1383                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1384                                 btrfs_file_extent_disk_num_bytes(leaf, fi), 0);
1385                 BUG_ON(ret);
1386         }
1387         return 0;
1388 }
1389
1390 static void reada_walk_down(struct btrfs_root *root,
1391                             struct extent_buffer *node)
1392 {
1393         int i;
1394         u32 nritems;
1395         u64 bytenr;
1396         int ret;
1397         u32 refs;
1398         int level;
1399         u32 blocksize;
1400
1401         nritems = btrfs_header_nritems(node);
1402         level = btrfs_header_level(node);
1403         for (i = 0; i < nritems; i++) {
1404                 bytenr = btrfs_node_blockptr(node, i);
1405                 blocksize = btrfs_level_size(root, level - 1);
1406                 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs);
1407                 BUG_ON(ret);
1408                 if (refs != 1)
1409                         continue;
1410                 mutex_unlock(&root->fs_info->fs_mutex);
1411                 ret = readahead_tree_block(root, bytenr, blocksize);
1412                 cond_resched();
1413                 mutex_lock(&root->fs_info->fs_mutex);
1414                 if (ret)
1415                         break;
1416         }
1417 }
1418
1419 /*
1420  * helper function for drop_snapshot, this walks down the tree dropping ref
1421  * counts as it goes.
1422  */
1423 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1424                           *root, struct btrfs_path *path, int *level)
1425 {
1426         struct extent_buffer *next;
1427         struct extent_buffer *cur;
1428         u64 bytenr;
1429         u32 blocksize;
1430         int ret;
1431         u32 refs;
1432
1433         WARN_ON(*level < 0);
1434         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1435         ret = lookup_extent_ref(trans, root,
1436                                 path->nodes[*level]->start,
1437                                 path->nodes[*level]->len, &refs);
1438         BUG_ON(ret);
1439         if (refs > 1)
1440                 goto out;
1441
1442         /*
1443          * walk down to the last node level and free all the leaves
1444          */
1445         while(*level >= 0) {
1446                 WARN_ON(*level < 0);
1447                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1448                 cur = path->nodes[*level];
1449
1450                 if (*level > 0 && path->slots[*level] == 0)
1451                         reada_walk_down(root, cur);
1452
1453                 if (btrfs_header_level(cur) != *level)
1454                         WARN_ON(1);
1455
1456                 if (path->slots[*level] >=
1457                     btrfs_header_nritems(cur))
1458                         break;
1459                 if (*level == 0) {
1460                         ret = drop_leaf_ref(trans, root, cur);
1461                         BUG_ON(ret);
1462                         break;
1463                 }
1464                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1465                 blocksize = btrfs_level_size(root, *level - 1);
1466                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1467                 BUG_ON(ret);
1468                 if (refs != 1) {
1469                         path->slots[*level]++;
1470                         ret = btrfs_free_extent(trans, root, bytenr,
1471                                                 blocksize, 1);
1472                         BUG_ON(ret);
1473                         continue;
1474                 }
1475                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1476                 if (!next || !btrfs_buffer_uptodate(next)) {
1477                         free_extent_buffer(next);
1478                         mutex_unlock(&root->fs_info->fs_mutex);
1479                         next = read_tree_block(root, bytenr, blocksize);
1480                         mutex_lock(&root->fs_info->fs_mutex);
1481
1482                         /* we dropped the lock, check one more time */
1483                         ret = lookup_extent_ref(trans, root, bytenr,
1484                                                 blocksize, &refs);
1485                         BUG_ON(ret);
1486                         if (refs != 1) {
1487                                 path->slots[*level]++;
1488                                 free_extent_buffer(next);
1489                                 ret = btrfs_free_extent(trans, root,
1490                                                         bytenr, blocksize, 1);
1491                                 BUG_ON(ret);
1492                                 continue;
1493                         }
1494                 }
1495                 WARN_ON(*level <= 0);
1496                 if (path->nodes[*level-1])
1497                         free_extent_buffer(path->nodes[*level-1]);
1498                 path->nodes[*level-1] = next;
1499                 *level = btrfs_header_level(next);
1500                 path->slots[*level] = 0;
1501         }
1502 out:
1503         WARN_ON(*level < 0);
1504         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1505         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1506                                 path->nodes[*level]->len, 1);
1507         free_extent_buffer(path->nodes[*level]);
1508         path->nodes[*level] = NULL;
1509         *level += 1;
1510         BUG_ON(ret);
1511         return 0;
1512 }
1513
1514 /*
1515  * helper for dropping snapshots.  This walks back up the tree in the path
1516  * to find the first node higher up where we haven't yet gone through
1517  * all the slots
1518  */
1519 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1520                         *root, struct btrfs_path *path, int *level)
1521 {
1522         int i;
1523         int slot;
1524         int ret;
1525         struct btrfs_root_item *root_item = &root->root_item;
1526
1527         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1528                 slot = path->slots[i];
1529                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
1530                         struct extent_buffer *node;
1531                         struct btrfs_disk_key disk_key;
1532                         node = path->nodes[i];
1533                         path->slots[i]++;
1534                         *level = i;
1535                         WARN_ON(*level == 0);
1536                         btrfs_node_key(node, &disk_key, path->slots[i]);
1537                         memcpy(&root_item->drop_progress,
1538                                &disk_key, sizeof(disk_key));
1539                         root_item->drop_level = i;
1540                         return 0;
1541                 } else {
1542                         ret = btrfs_free_extent(trans, root,
1543                                                 path->nodes[*level]->start,
1544                                                 path->nodes[*level]->len, 1);
1545                         BUG_ON(ret);
1546                         free_extent_buffer(path->nodes[*level]);
1547                         path->nodes[*level] = NULL;
1548                         *level = i + 1;
1549                 }
1550         }
1551         return 1;
1552 }
1553
1554 /*
1555  * drop the reference count on the tree rooted at 'snap'.  This traverses
1556  * the tree freeing any blocks that have a ref count of zero after being
1557  * decremented.
1558  */
1559 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1560                         *root)
1561 {
1562         int ret = 0;
1563         int wret;
1564         int level;
1565         struct btrfs_path *path;
1566         int i;
1567         int orig_level;
1568         struct btrfs_root_item *root_item = &root->root_item;
1569
1570         path = btrfs_alloc_path();
1571         BUG_ON(!path);
1572
1573         level = btrfs_header_level(root->node);
1574         orig_level = level;
1575         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1576                 path->nodes[level] = root->node;
1577                 extent_buffer_get(root->node);
1578                 path->slots[level] = 0;
1579         } else {
1580                 struct btrfs_key key;
1581                 struct btrfs_disk_key found_key;
1582                 struct extent_buffer *node;
1583
1584                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1585                 level = root_item->drop_level;
1586                 path->lowest_level = level;
1587                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1588                 if (wret < 0) {
1589                         ret = wret;
1590                         goto out;
1591                 }
1592                 node = path->nodes[level];
1593                 btrfs_node_key(node, &found_key, path->slots[level]);
1594                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
1595                                sizeof(found_key)));
1596         }
1597         while(1) {
1598                 wret = walk_down_tree(trans, root, path, &level);
1599                 if (wret > 0)
1600                         break;
1601                 if (wret < 0)
1602                         ret = wret;
1603
1604                 wret = walk_up_tree(trans, root, path, &level);
1605                 if (wret > 0)
1606                         break;
1607                 if (wret < 0)
1608                         ret = wret;
1609                 ret = -EAGAIN;
1610                 break;
1611         }
1612         for (i = 0; i <= orig_level; i++) {
1613                 if (path->nodes[i]) {
1614                         free_extent_buffer(path->nodes[i]);
1615                         path->nodes[i] = NULL;
1616                 }
1617         }
1618 out:
1619         btrfs_free_path(path);
1620         return ret;
1621 }
1622
1623 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1624 {
1625         u64 start;
1626         u64 end;
1627         u64 ptr;
1628         int ret;
1629         while(1) {
1630                 ret = find_first_extent_bit(&info->block_group_cache, 0,
1631                                             &start, &end, (unsigned int)-1);
1632                 if (ret)
1633                         break;
1634                 ret = get_state_private(&info->block_group_cache, start, &ptr);
1635                 if (!ret)
1636                         kfree((void *)(unsigned long)ptr);
1637                 clear_extent_bits(&info->block_group_cache, start,
1638                                   end, (unsigned int)-1, GFP_NOFS);
1639         }
1640         while(1) {
1641                 ret = find_first_extent_bit(&info->free_space_cache, 0,
1642                                             &start, &end, EXTENT_DIRTY);
1643                 if (ret)
1644                         break;
1645                 clear_extent_dirty(&info->free_space_cache, start,
1646                                    end, GFP_NOFS);
1647         }
1648         return 0;
1649 }
1650
1651 int btrfs_read_block_groups(struct btrfs_root *root)
1652 {
1653         struct btrfs_path *path;
1654         int ret;
1655         int err = 0;
1656         int bit;
1657         struct btrfs_block_group_cache *cache;
1658         struct btrfs_fs_info *info = root->fs_info;
1659         struct extent_map_tree *block_group_cache;
1660         struct btrfs_key key;
1661         struct btrfs_key found_key;
1662         struct extent_buffer *leaf;
1663
1664         block_group_cache = &info->block_group_cache;
1665
1666         root = info->extent_root;
1667         key.objectid = 0;
1668         key.offset = BTRFS_BLOCK_GROUP_SIZE;
1669         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1670
1671         path = btrfs_alloc_path();
1672         if (!path)
1673                 return -ENOMEM;
1674
1675         while(1) {
1676                 ret = btrfs_search_slot(NULL, info->extent_root,
1677                                         &key, path, 0, 0);
1678                 if (ret != 0) {
1679                         err = ret;
1680                         break;
1681                 }
1682                 leaf = path->nodes[0];
1683                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1684                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1685                 if (!cache) {
1686                         err = -1;
1687                         break;
1688                 }
1689
1690                 read_extent_buffer(leaf, &cache->item,
1691                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
1692                                    sizeof(cache->item));
1693                 memcpy(&cache->key, &found_key, sizeof(found_key));
1694                 cache->cached = 0;
1695                 cache->pinned = 0;
1696                 key.objectid = found_key.objectid + found_key.offset;
1697                 btrfs_release_path(root, path);
1698
1699                 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) {
1700                         bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA;
1701                         cache->data = BTRFS_BLOCK_GROUP_MIXED;
1702                 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) {
1703                         bit = BLOCK_GROUP_DATA;
1704                         cache->data = BTRFS_BLOCK_GROUP_DATA;
1705                 } else {
1706                         bit = BLOCK_GROUP_METADATA;
1707                         cache->data = 0;
1708                 }
1709
1710                 /* use EXTENT_LOCKED to prevent merging */
1711                 set_extent_bits(block_group_cache, found_key.objectid,
1712                                 found_key.objectid + found_key.offset - 1,
1713                                 bit | EXTENT_LOCKED, GFP_NOFS);
1714                 set_state_private(block_group_cache, found_key.objectid,
1715                                   (unsigned long)cache);
1716
1717                 if (key.objectid >=
1718                     btrfs_super_total_bytes(&info->super_copy))
1719                         break;
1720         }
1721
1722         btrfs_free_path(path);
1723         return 0;
1724 }