Btrfs: fix page cache memory leak
[linux-2.6] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
16                                                  struct btrfs_block_group_cache
17                                                  *hint, int data)
18 {
19         struct btrfs_block_group_cache *cache[8];
20         struct btrfs_block_group_cache *found_group = NULL;
21         struct btrfs_fs_info *info = root->fs_info;
22         u64 used;
23         u64 last = 0;
24         u64 hint_last;
25         int i;
26         int ret;
27         int full_search = 0;
28         if (hint) {
29                 used = btrfs_block_group_used(&hint->item);
30                 if (used < (hint->key.offset * 2) / 3) {
31                         return hint;
32                 }
33                 radix_tree_tag_clear(&info->block_group_radix,
34                                      hint->key.objectid + hint->key.offset - 1,
35                                      BTRFS_BLOCK_GROUP_AVAIL);
36                 last = hint->key.objectid + hint->key.offset;
37                 hint_last = last;
38         } else {
39                 hint_last = 0;
40                 last = 0;
41         }
42         while(1) {
43                 ret = radix_tree_gang_lookup_tag(&info->block_group_radix,
44                                                  (void **)cache,
45                                                  last, ARRAY_SIZE(cache),
46                                                  BTRFS_BLOCK_GROUP_AVAIL);
47                 if (!ret)
48                         break;
49                 for (i = 0; i < ret; i++) {
50                         used = btrfs_block_group_used(&cache[i]->item);
51                         if (used < (cache[i]->key.offset * 2) / 3) {
52                                 info->block_group_cache = cache[i];
53                                 found_group = cache[i];
54                                 goto found;
55                         }
56                         radix_tree_tag_clear(&info->block_group_radix,
57                                            cache[i]->key.objectid +
58                                            cache[i]->key.offset - 1,
59                                            BTRFS_BLOCK_GROUP_AVAIL);
60                         last = cache[i]->key.objectid +
61                                 cache[i]->key.offset;
62                 }
63         }
64         last = hint_last;
65 again:
66         while(1) {
67                 ret = radix_tree_gang_lookup(&info->block_group_radix,
68                                                  (void **)cache,
69                                                  last, ARRAY_SIZE(cache));
70                 if (!ret)
71                         break;
72                 for (i = 0; i < ret; i++) {
73                         used = btrfs_block_group_used(&cache[i]->item);
74                         if (used < cache[i]->key.offset) {
75                                 info->block_group_cache = cache[i];
76                                 found_group = cache[i];
77                                 goto found;
78                         }
79                         radix_tree_tag_clear(&info->block_group_radix,
80                                            cache[i]->key.objectid +
81                                            cache[i]->key.offset - 1,
82                                            BTRFS_BLOCK_GROUP_AVAIL);
83                         last = cache[i]->key.objectid +
84                                 cache[i]->key.offset;
85                 }
86         }
87         info->block_group_cache = NULL;
88         if (!full_search) {
89                 last = 0;
90                 full_search = 1;
91                 goto again;
92         }
93 found:
94         if (!found_group) {
95                 ret = radix_tree_gang_lookup(&info->block_group_radix,
96                                              (void **)&found_group, 0, 1);
97                 BUG_ON(ret != 1);
98         }
99         return found_group;
100 }
101
102 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
103                                 struct btrfs_root *root,
104                                 u64 blocknr, u64 num_blocks)
105 {
106         struct btrfs_path *path;
107         int ret;
108         struct btrfs_key key;
109         struct btrfs_leaf *l;
110         struct btrfs_extent_item *item;
111         struct btrfs_key ins;
112         u32 refs;
113
114         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
115                          &ins);
116         path = btrfs_alloc_path();
117         BUG_ON(!path);
118         btrfs_init_path(path);
119         key.objectid = blocknr;
120         key.flags = 0;
121         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
122         key.offset = num_blocks;
123         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
124                                 0, 1);
125         if (ret != 0) {
126 printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
127                 BUG();
128         }
129         BUG_ON(ret != 0);
130         l = btrfs_buffer_leaf(path->nodes[0]);
131         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
132         refs = btrfs_extent_refs(item);
133         btrfs_set_extent_refs(item, refs + 1);
134         btrfs_mark_buffer_dirty(path->nodes[0]);
135
136         btrfs_release_path(root->fs_info->extent_root, path);
137         btrfs_free_path(path);
138         finish_current_insert(trans, root->fs_info->extent_root);
139         del_pending_extents(trans, root->fs_info->extent_root);
140         return 0;
141 }
142
143 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
144                              struct btrfs_root *root, u64 blocknr,
145                              u64 num_blocks, u32 *refs)
146 {
147         struct btrfs_path *path;
148         int ret;
149         struct btrfs_key key;
150         struct btrfs_leaf *l;
151         struct btrfs_extent_item *item;
152
153         path = btrfs_alloc_path();
154         btrfs_init_path(path);
155         key.objectid = blocknr;
156         key.offset = num_blocks;
157         key.flags = 0;
158         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
159         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
160                                 0, 0);
161         if (ret != 0)
162                 BUG();
163         l = btrfs_buffer_leaf(path->nodes[0]);
164         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
165         *refs = btrfs_extent_refs(item);
166         btrfs_release_path(root->fs_info->extent_root, path);
167         btrfs_free_path(path);
168         return 0;
169 }
170
171 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
172                        struct btrfs_root *root)
173 {
174         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
175 }
176
177 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
178                   struct buffer_head *buf)
179 {
180         u64 blocknr;
181         struct btrfs_node *buf_node;
182         struct btrfs_leaf *buf_leaf;
183         struct btrfs_disk_key *key;
184         struct btrfs_file_extent_item *fi;
185         int i;
186         int leaf;
187         int ret;
188
189         if (!root->ref_cows)
190                 return 0;
191         buf_node = btrfs_buffer_node(buf);
192         leaf = btrfs_is_leaf(buf_node);
193         buf_leaf = btrfs_buffer_leaf(buf);
194         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
195                 if (leaf) {
196                         key = &buf_leaf->items[i].key;
197                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
198                                 continue;
199                         fi = btrfs_item_ptr(buf_leaf, i,
200                                             struct btrfs_file_extent_item);
201                         if (btrfs_file_extent_type(fi) ==
202                             BTRFS_FILE_EXTENT_INLINE)
203                                 continue;
204                         ret = btrfs_inc_extent_ref(trans, root,
205                                     btrfs_file_extent_disk_blocknr(fi),
206                                     btrfs_file_extent_disk_num_blocks(fi));
207                         BUG_ON(ret);
208                 } else {
209                         blocknr = btrfs_node_blockptr(buf_node, i);
210                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
211                         BUG_ON(ret);
212                 }
213         }
214         return 0;
215 }
216
217 static int write_one_cache_group(struct btrfs_trans_handle *trans,
218                                  struct btrfs_root *root,
219                                  struct btrfs_path *path,
220                                  struct btrfs_block_group_cache *cache)
221 {
222         int ret;
223         int pending_ret;
224         struct btrfs_root *extent_root = root->fs_info->extent_root;
225         struct btrfs_block_group_item *bi;
226         struct btrfs_key ins;
227
228         find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins);
229         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
230         BUG_ON(ret);
231         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
232                             struct btrfs_block_group_item);
233         memcpy(bi, &cache->item, sizeof(*bi));
234         mark_buffer_dirty(path->nodes[0]);
235         btrfs_release_path(extent_root, path);
236
237         finish_current_insert(trans, extent_root);
238         pending_ret = del_pending_extents(trans, extent_root);
239         if (ret)
240                 return ret;
241         if (pending_ret)
242                 return pending_ret;
243         return 0;
244
245 }
246
247 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
248                                     struct btrfs_root *root)
249 {
250         struct btrfs_block_group_cache *cache[8];
251         int ret;
252         int err = 0;
253         int werr = 0;
254         struct radix_tree_root *radix = &root->fs_info->block_group_radix;
255         int i;
256         struct btrfs_path *path;
257
258         path = btrfs_alloc_path();
259         if (!path)
260                 return -ENOMEM;
261
262         while(1) {
263                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
264                                                  0, ARRAY_SIZE(cache),
265                                                  BTRFS_BLOCK_GROUP_DIRTY);
266                 if (!ret)
267                         break;
268                 for (i = 0; i < ret; i++) {
269                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
270                                              cache[i]->key.offset - 1,
271                                              BTRFS_BLOCK_GROUP_DIRTY);
272                         err = write_one_cache_group(trans, root,
273                                                     path, cache[i]);
274                         if (err)
275                                 werr = err;
276                         cache[i]->last_alloc = cache[i]->first_free;
277                 }
278         }
279         btrfs_free_path(path);
280         return werr;
281 }
282
283 static int update_block_group(struct btrfs_trans_handle *trans,
284                               struct btrfs_root *root,
285                               u64 blocknr, u64 num, int alloc)
286 {
287         struct btrfs_block_group_cache *cache;
288         struct btrfs_fs_info *info = root->fs_info;
289         u64 total = num;
290         u64 old_val;
291         u64 block_in_group;
292         int ret;
293         while(total) {
294                 ret = radix_tree_gang_lookup(&info->block_group_radix,
295                                              (void **)&cache, blocknr, 1);
296                 if (!ret) {
297                         printk(KERN_CRIT "blocknr %Lu lookup failed\n",
298                                blocknr);
299                         return -1;
300                 }
301                 block_in_group = blocknr - cache->key.objectid;
302                 WARN_ON(block_in_group > cache->key.offset);
303                 radix_tree_tag_set(&info->block_group_radix,
304                                    cache->key.objectid + cache->key.offset - 1,
305                                    BTRFS_BLOCK_GROUP_DIRTY);
306
307                 old_val = btrfs_block_group_used(&cache->item);
308                 num = min(total, cache->key.offset - block_in_group);
309                 total -= num;
310                 blocknr += num;
311                 if (alloc) {
312                         old_val += num;
313                         if (blocknr > cache->last_alloc)
314                                 cache->last_alloc = blocknr;
315                 } else {
316                         old_val -= num;
317                         if (blocknr < cache->first_free)
318                                 cache->first_free = blocknr;
319                 }
320                 btrfs_set_block_group_used(&cache->item, old_val);
321         }
322         return 0;
323 }
324
325 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
326                                btrfs_root *root)
327 {
328         unsigned long gang[8];
329         u64 first = 0;
330         int ret;
331         int i;
332         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
333
334         while(1) {
335                 ret = find_first_radix_bit(pinned_radix, gang,
336                                            ARRAY_SIZE(gang));
337                 if (!ret)
338                         break;
339                 if (!first)
340                         first = gang[0];
341                 for (i = 0; i < ret; i++) {
342                         clear_radix_bit(pinned_radix, gang[i]);
343                 }
344         }
345         return 0;
346 }
347
348 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
349                                  btrfs_root *extent_root)
350 {
351         struct btrfs_key ins;
352         struct btrfs_extent_item extent_item;
353         int i;
354         int ret;
355         u64 super_blocks_used;
356         struct btrfs_fs_info *info = extent_root->fs_info;
357
358         btrfs_set_extent_refs(&extent_item, 1);
359         ins.offset = 1;
360         ins.flags = 0;
361         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
362         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
363
364         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
365                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
366                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
367                 btrfs_set_super_blocks_used(info->disk_super,
368                                             super_blocks_used + 1);
369                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
370                                         sizeof(extent_item));
371                 BUG_ON(ret);
372         }
373         extent_root->fs_info->extent_tree_insert_nr = 0;
374         extent_root->fs_info->extent_tree_prealloc_nr = 0;
375         return 0;
376 }
377
378 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
379 {
380         int err;
381         struct btrfs_header *header;
382         struct buffer_head *bh;
383
384         if (!pending) {
385                 bh = btrfs_find_tree_block(root, blocknr);
386                 if (bh) {
387                         if (buffer_uptodate(bh)) {
388                                 u64 transid =
389                                     root->fs_info->running_transaction->transid;
390                                 header = btrfs_buffer_header(bh);
391                                 if (btrfs_header_generation(header) ==
392                                     transid) {
393                                         btrfs_block_release(root, bh);
394                                         return 0;
395                                 }
396                         }
397                         btrfs_block_release(root, bh);
398                 }
399                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
400         } else {
401                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
402         }
403         BUG_ON(err);
404         return 0;
405 }
406
407 /*
408  * remove an extent from the root, returns 0 on success
409  */
410 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
411                          *root, u64 blocknr, u64 num_blocks, int pin)
412 {
413         struct btrfs_path *path;
414         struct btrfs_key key;
415         struct btrfs_fs_info *info = root->fs_info;
416         struct btrfs_root *extent_root = info->extent_root;
417         int ret;
418         struct btrfs_extent_item *ei;
419         struct btrfs_key ins;
420         u32 refs;
421
422         key.objectid = blocknr;
423         key.flags = 0;
424         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
425         key.offset = num_blocks;
426
427         find_free_extent(trans, root, 0, 0, (u64)-1, &ins);
428         path = btrfs_alloc_path();
429         BUG_ON(!path);
430         btrfs_init_path(path);
431
432         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
433         if (ret) {
434                 printk("failed to find %Lu\n", key.objectid);
435                 btrfs_print_tree(extent_root, extent_root->node);
436                 printk("failed to find %Lu\n", key.objectid);
437                 BUG();
438         }
439         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
440                             struct btrfs_extent_item);
441         BUG_ON(ei->refs == 0);
442         refs = btrfs_extent_refs(ei) - 1;
443         btrfs_set_extent_refs(ei, refs);
444         btrfs_mark_buffer_dirty(path->nodes[0]);
445         if (refs == 0) {
446                 u64 super_blocks_used;
447
448                 if (pin) {
449                         ret = pin_down_block(root, blocknr, 0);
450                         BUG_ON(ret);
451                 }
452
453                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
454                 btrfs_set_super_blocks_used(info->disk_super,
455                                             super_blocks_used - num_blocks);
456                 ret = btrfs_del_item(trans, extent_root, path);
457                 if (ret)
458                         BUG();
459                 ret = update_block_group(trans, root, blocknr, num_blocks, 0);
460                 BUG_ON(ret);
461         }
462         btrfs_release_path(extent_root, path);
463         btrfs_free_path(path);
464         finish_current_insert(trans, extent_root);
465         return ret;
466 }
467
468 /*
469  * find all the blocks marked as pending in the radix tree and remove
470  * them from the extent map
471  */
472 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
473                                btrfs_root *extent_root)
474 {
475         int ret;
476         int wret;
477         int err = 0;
478         unsigned long gang[4];
479         int i;
480         struct radix_tree_root *pending_radix;
481         struct radix_tree_root *pinned_radix;
482
483         pending_radix = &extent_root->fs_info->pending_del_radix;
484         pinned_radix = &extent_root->fs_info->pinned_radix;
485
486         while(1) {
487                 ret = find_first_radix_bit(pending_radix, gang,
488                                            ARRAY_SIZE(gang));
489                 if (!ret)
490                         break;
491                 for (i = 0; i < ret; i++) {
492                         wret = set_radix_bit(pinned_radix, gang[i]);
493                         BUG_ON(wret);
494                         wret = clear_radix_bit(pending_radix, gang[i]);
495                         BUG_ON(wret);
496                         wret = __free_extent(trans, extent_root,
497                                              gang[i], 1, 0);
498                         if (wret)
499                                 err = wret;
500                 }
501         }
502         return err;
503 }
504
505 /*
506  * remove an extent from the root, returns 0 on success
507  */
508 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
509                       *root, u64 blocknr, u64 num_blocks, int pin)
510 {
511         struct btrfs_root *extent_root = root->fs_info->extent_root;
512         int pending_ret;
513         int ret;
514
515         if (root == extent_root) {
516                 pin_down_block(root, blocknr, 1);
517                 return 0;
518         }
519         ret = __free_extent(trans, root, blocknr, num_blocks, pin);
520         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
521         return ret ? ret : pending_ret;
522 }
523
524 /*
525  * walks the btree of allocated extents and find a hole of a given size.
526  * The key ins is changed to record the hole:
527  * ins->objectid == block start
528  * ins->flags = BTRFS_EXTENT_ITEM_KEY
529  * ins->offset == number of blocks
530  * Any available blocks before search_start are skipped.
531  */
532 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
533                             *orig_root, u64 num_blocks, u64 search_start, u64
534                             search_end, struct btrfs_key *ins)
535 {
536         struct btrfs_path *path;
537         struct btrfs_key key;
538         int ret;
539         u64 hole_size = 0;
540         int slot = 0;
541         u64 last_block = 0;
542         u64 test_block;
543         int start_found;
544         struct btrfs_leaf *l;
545         struct btrfs_root * root = orig_root->fs_info->extent_root;
546         struct btrfs_fs_info *info = root->fs_info;
547         int total_needed = num_blocks;
548         int total_found = 0;
549         int fill_prealloc = 0;
550         int level;
551         int update_block_group = 0;
552         struct btrfs_block_group_cache *hint_block_group;
553
554         path = btrfs_alloc_path();
555         ins->flags = 0;
556         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
557
558         level = btrfs_header_level(btrfs_buffer_header(root->node));
559         /* find search start here */
560         if (0 && search_start && num_blocks) {
561                 u64 used;
562                 ret = radix_tree_gang_lookup(&info->block_group_radix,
563                                              (void **)&hint_block_group,
564                                              search_start, 1);
565                 if (ret) {
566                         used = btrfs_block_group_used(&hint_block_group->item);
567                         if (used > (hint_block_group->key.offset * 9) / 10)
568                                 search_start = 0;
569                         else if (search_start < hint_block_group->last_alloc)
570                                 search_start = hint_block_group->last_alloc;
571                 } else {
572                         search_start = 0;
573                 }
574         }
575         if (num_blocks == 0) {
576                 fill_prealloc = 1;
577                 num_blocks = 1;
578                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
579         }
580         if (1 || !search_start) {
581                 trans->block_group = btrfs_find_block_group(root,
582                                                             trans->block_group,
583                                                             0);
584                 if (trans->block_group->last_alloc > search_start)
585                         search_start = trans->block_group->last_alloc;
586                 update_block_group = 1;
587         }
588 check_failed:
589         btrfs_init_path(path);
590         ins->objectid = search_start;
591         ins->offset = 0;
592         start_found = 0;
593         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
594         if (ret < 0)
595                 goto error;
596
597         if (path->slots[0] > 0)
598                 path->slots[0]--;
599
600         while (1) {
601                 l = btrfs_buffer_leaf(path->nodes[0]);
602                 slot = path->slots[0];
603                 if (slot >= btrfs_header_nritems(&l->header)) {
604                         if (fill_prealloc) {
605                                 info->extent_tree_prealloc_nr = 0;
606                                 total_found = 0;
607                         }
608                         ret = btrfs_next_leaf(root, path);
609                         if (ret == 0)
610                                 continue;
611                         if (ret < 0)
612                                 goto error;
613                         if (!start_found) {
614                                 ins->objectid = search_start;
615                                 ins->offset = (u64)-1 - search_start;
616                                 start_found = 1;
617                                 goto check_pending;
618                         }
619                         ins->objectid = last_block > search_start ?
620                                         last_block : search_start;
621                         ins->offset = (u64)-1 - ins->objectid;
622                         goto check_pending;
623                 }
624                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
625                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
626                         goto next;
627                 if (key.objectid >= search_start) {
628                         if (start_found) {
629                                 if (last_block < search_start)
630                                         last_block = search_start;
631                                 hole_size = key.objectid - last_block;
632                                 if (hole_size >= num_blocks) {
633                                         ins->objectid = last_block;
634                                         ins->offset = hole_size;
635                                         goto check_pending;
636                                 }
637                         }
638                 }
639                 start_found = 1;
640                 last_block = key.objectid + key.offset;
641 next:
642                 path->slots[0]++;
643         }
644         // FIXME -ENOSPC
645 check_pending:
646         /* we have to make sure we didn't find an extent that has already
647          * been allocated by the map tree or the original allocation
648          */
649         btrfs_release_path(root, path);
650         BUG_ON(ins->objectid < search_start);
651         if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) {
652                 if (search_start == 0)
653                         return -ENOSPC;
654                 search_start = 0;
655                 goto check_failed;
656         }
657         for (test_block = ins->objectid;
658              test_block < ins->objectid + num_blocks; test_block++) {
659                 if (test_radix_bit(&info->pinned_radix, test_block)) {
660                         search_start = test_block + 1;
661                         goto check_failed;
662                 }
663         }
664         if (!fill_prealloc && info->extent_tree_insert_nr) {
665                 u64 last =
666                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
667                 if (ins->objectid + num_blocks >
668                     info->extent_tree_insert[0] &&
669                     ins->objectid <= last) {
670                         search_start = last + 1;
671                         WARN_ON(1);
672                         goto check_failed;
673                 }
674         }
675         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
676                 u64 first =
677                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
678                 if (ins->objectid + num_blocks > first &&
679                     ins->objectid <= info->extent_tree_prealloc[0]) {
680                         search_start = info->extent_tree_prealloc[0] + 1;
681                         WARN_ON(1);
682                         goto check_failed;
683                 }
684         }
685         if (fill_prealloc) {
686                 int nr;
687                 test_block = ins->objectid;
688                 while(test_block < ins->objectid + ins->offset &&
689                       total_found < total_needed) {
690                         nr = total_needed - total_found - 1;
691                         BUG_ON(nr < 0);
692                         info->extent_tree_prealloc[nr] = test_block;
693                         total_found++;
694                         test_block++;
695                 }
696                 if (total_found < total_needed) {
697                         search_start = test_block;
698                         goto check_failed;
699                 }
700                 info->extent_tree_prealloc_nr = total_found;
701         }
702         if (update_block_group) {
703                 ret = radix_tree_gang_lookup(&info->block_group_radix,
704                                              (void **)&trans->block_group,
705                                              ins->objectid, 1);
706                 if (ret) {
707                         trans->block_group->last_alloc = ins->objectid;
708                 }
709         }
710         ins->offset = num_blocks;
711         btrfs_free_path(path);
712         return 0;
713 error:
714         btrfs_release_path(root, path);
715         btrfs_free_path(path);
716         return ret;
717 }
718 /*
719  * finds a free extent and does all the dirty work required for allocation
720  * returns the key for the extent through ins, and a tree buffer for
721  * the first block of the extent through buf.
722  *
723  * returns 0 if everything worked, non-zero otherwise.
724  */
725 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
726                        struct btrfs_root *root, u64 owner,
727                        u64 num_blocks, u64 search_start,
728                        u64 search_end, struct btrfs_key *ins)
729 {
730         int ret;
731         int pending_ret;
732         u64 super_blocks_used;
733         struct btrfs_fs_info *info = root->fs_info;
734         struct btrfs_root *extent_root = info->extent_root;
735         struct btrfs_extent_item extent_item;
736         struct btrfs_key prealloc_key;
737
738         btrfs_set_extent_refs(&extent_item, 1);
739         btrfs_set_extent_owner(&extent_item, owner);
740
741         if (root == extent_root) {
742                 int nr;
743                 BUG_ON(info->extent_tree_prealloc_nr == 0);
744                 BUG_ON(num_blocks != 1);
745                 ins->offset = 1;
746                 info->extent_tree_prealloc_nr--;
747                 nr = info->extent_tree_prealloc_nr;
748                 ins->objectid = info->extent_tree_prealloc[nr];
749                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
750                         ins->objectid;
751                 ret = update_block_group(trans, root,
752                                          ins->objectid, ins->offset, 1);
753                 BUG_ON(ret);
754                 return 0;
755         }
756         /* do the real allocation */
757         ret = find_free_extent(trans, root, num_blocks, search_start,
758                                search_end, ins);
759         if (ret)
760                 return ret;
761
762         /* then do prealloc for the extent tree */
763         ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset,
764                                search_end, &prealloc_key);
765         if (ret)
766                 return ret;
767
768         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
769         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
770                                     num_blocks);
771         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
772                                 sizeof(extent_item));
773
774         finish_current_insert(trans, extent_root);
775         pending_ret = del_pending_extents(trans, extent_root);
776         if (ret)
777                 return ret;
778         if (pending_ret)
779                 return pending_ret;
780         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
781         return 0;
782 }
783
784 /*
785  * helper function to allocate a block for a given tree
786  * returns the tree buffer or NULL.
787  */
788 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
789                                            struct btrfs_root *root, u64 hint)
790 {
791         struct btrfs_key ins;
792         int ret;
793         struct buffer_head *buf;
794
795         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
796                                  1, hint, (unsigned long)-1, &ins);
797         if (ret) {
798                 BUG();
799                 return NULL;
800         }
801         BUG_ON(ret);
802         buf = btrfs_find_create_tree_block(root, ins.objectid);
803         set_buffer_uptodate(buf);
804         set_buffer_checked(buf);
805         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
806         return buf;
807 }
808
809 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
810                          struct btrfs_root *root, struct buffer_head *cur)
811 {
812         struct btrfs_disk_key *key;
813         struct btrfs_leaf *leaf;
814         struct btrfs_file_extent_item *fi;
815         int i;
816         int nritems;
817         int ret;
818
819         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
820         leaf = btrfs_buffer_leaf(cur);
821         nritems = btrfs_header_nritems(&leaf->header);
822         for (i = 0; i < nritems; i++) {
823                 key = &leaf->items[i].key;
824                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
825                         continue;
826                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
827                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
828                         continue;
829                 /*
830                  * FIXME make sure to insert a trans record that
831                  * repeats the snapshot del on crash
832                  */
833                 ret = btrfs_free_extent(trans, root,
834                                         btrfs_file_extent_disk_blocknr(fi),
835                                         btrfs_file_extent_disk_num_blocks(fi),
836                                         0);
837                 BUG_ON(ret);
838         }
839         return 0;
840 }
841
842 /*
843  * helper function for drop_snapshot, this walks down the tree dropping ref
844  * counts as it goes.
845  */
846 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
847                           *root, struct btrfs_path *path, int *level)
848 {
849         struct buffer_head *next;
850         struct buffer_head *cur;
851         u64 blocknr;
852         int ret;
853         u32 refs;
854
855         WARN_ON(*level < 0);
856         WARN_ON(*level >= BTRFS_MAX_LEVEL);
857         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
858                                1, &refs);
859         BUG_ON(ret);
860         if (refs > 1)
861                 goto out;
862         /*
863          * walk down to the last node level and free all the leaves
864          */
865         while(*level >= 0) {
866                 WARN_ON(*level < 0);
867                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
868                 cur = path->nodes[*level];
869                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
870                         WARN_ON(1);
871                 if (path->slots[*level] >=
872                     btrfs_header_nritems(btrfs_buffer_header(cur)))
873                         break;
874                 if (*level == 0) {
875                         ret = drop_leaf_ref(trans, root, cur);
876                         BUG_ON(ret);
877                         break;
878                 }
879                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
880                                               path->slots[*level]);
881                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
882                 BUG_ON(ret);
883                 if (refs != 1) {
884                         path->slots[*level]++;
885                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
886                         BUG_ON(ret);
887                         continue;
888                 }
889                 next = read_tree_block(root, blocknr);
890                 WARN_ON(*level <= 0);
891                 if (path->nodes[*level-1])
892                         btrfs_block_release(root, path->nodes[*level-1]);
893                 path->nodes[*level-1] = next;
894                 *level = btrfs_header_level(btrfs_buffer_header(next));
895                 path->slots[*level] = 0;
896         }
897 out:
898         WARN_ON(*level < 0);
899         WARN_ON(*level >= BTRFS_MAX_LEVEL);
900         ret = btrfs_free_extent(trans, root,
901                                 bh_blocknr(path->nodes[*level]), 1, 1);
902         btrfs_block_release(root, path->nodes[*level]);
903         path->nodes[*level] = NULL;
904         *level += 1;
905         BUG_ON(ret);
906         return 0;
907 }
908
909 /*
910  * helper for dropping snapshots.  This walks back up the tree in the path
911  * to find the first node higher up where we haven't yet gone through
912  * all the slots
913  */
914 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
915                         *root, struct btrfs_path *path, int *level)
916 {
917         int i;
918         int slot;
919         int ret;
920         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
921                 slot = path->slots[i];
922                 if (slot < btrfs_header_nritems(
923                     btrfs_buffer_header(path->nodes[i])) - 1) {
924                         path->slots[i]++;
925                         *level = i;
926                         return 0;
927                 } else {
928                         ret = btrfs_free_extent(trans, root,
929                                                 bh_blocknr(path->nodes[*level]),
930                                                 1, 1);
931                         BUG_ON(ret);
932                         btrfs_block_release(root, path->nodes[*level]);
933                         path->nodes[*level] = NULL;
934                         *level = i + 1;
935                 }
936         }
937         return 1;
938 }
939
940 /*
941  * drop the reference count on the tree rooted at 'snap'.  This traverses
942  * the tree freeing any blocks that have a ref count of zero after being
943  * decremented.
944  */
945 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
946                         *root, struct buffer_head *snap)
947 {
948         int ret = 0;
949         int wret;
950         int level;
951         struct btrfs_path *path;
952         int i;
953         int orig_level;
954
955         path = btrfs_alloc_path();
956         BUG_ON(!path);
957         btrfs_init_path(path);
958
959         level = btrfs_header_level(btrfs_buffer_header(snap));
960         orig_level = level;
961         path->nodes[level] = snap;
962         path->slots[level] = 0;
963         while(1) {
964                 wret = walk_down_tree(trans, root, path, &level);
965                 if (wret > 0)
966                         break;
967                 if (wret < 0)
968                         ret = wret;
969
970                 wret = walk_up_tree(trans, root, path, &level);
971                 if (wret > 0)
972                         break;
973                 if (wret < 0)
974                         ret = wret;
975                 btrfs_btree_balance_dirty(root);
976         }
977         for (i = 0; i <= orig_level; i++) {
978                 if (path->nodes[i]) {
979                         btrfs_block_release(root, path->nodes[i]);
980                 }
981         }
982         btrfs_free_path(path);
983         return ret;
984 }
985
986 int btrfs_free_block_groups(struct btrfs_fs_info *info)
987 {
988         int ret;
989         struct btrfs_block_group_cache *cache[8];
990         int i;
991
992         while(1) {
993                 ret = radix_tree_gang_lookup(&info->block_group_radix,
994                                              (void **)cache, 0,
995                                              ARRAY_SIZE(cache));
996                 if (!ret)
997                         break;
998                 for (i = 0; i < ret; i++) {
999                         radix_tree_delete(&info->block_group_radix,
1000                                           cache[i]->key.objectid +
1001                                           cache[i]->key.offset - 1);
1002                         kfree(cache[i]);
1003                 }
1004         }
1005         return 0;
1006 }
1007
1008 int btrfs_read_block_groups(struct btrfs_root *root)
1009 {
1010         struct btrfs_path *path;
1011         int ret;
1012         int err = 0;
1013         struct btrfs_block_group_item *bi;
1014         struct btrfs_block_group_cache *cache;
1015         struct btrfs_key key;
1016         struct btrfs_key found_key;
1017         struct btrfs_leaf *leaf;
1018         u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1019         u64 used;
1020
1021         root = root->fs_info->extent_root;
1022         key.objectid = 0;
1023         key.offset = group_size_blocks;
1024         key.flags = 0;
1025         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1026
1027         path = btrfs_alloc_path();
1028         if (!path)
1029                 return -ENOMEM;
1030
1031         while(1) {
1032                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
1033                                         &key, path, 0, 0);
1034                 if (ret != 0) {
1035                         err = ret;
1036                         break;
1037                 }
1038                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1039                 btrfs_disk_key_to_cpu(&found_key,
1040                                       &leaf->items[path->slots[0]].key);
1041                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1042                 if (!cache) {
1043                         err = -1;
1044                         break;
1045                 }
1046                 bi = btrfs_item_ptr(leaf, path->slots[0],
1047                                     struct btrfs_block_group_item);
1048                 memcpy(&cache->item, bi, sizeof(*bi));
1049                 memcpy(&cache->key, &found_key, sizeof(found_key));
1050                 cache->last_alloc = cache->key.objectid;
1051                 cache->first_free = cache->key.objectid;
1052                 key.objectid = found_key.objectid + found_key.offset;
1053                 btrfs_release_path(root, path);
1054                 ret = radix_tree_insert(&root->fs_info->block_group_radix,
1055                                         found_key.objectid +
1056                                         found_key.offset - 1,
1057                                         (void *)cache);
1058                 BUG_ON(ret);
1059                 used = btrfs_block_group_used(bi);
1060                 if (used < (key.offset * 2) / 3) {
1061                         radix_tree_tag_set(&root->fs_info->block_group_radix,
1062                                            found_key.objectid +
1063                                            found_key.offset - 1,
1064                                            BTRFS_BLOCK_GROUP_AVAIL);
1065                 }
1066                 if (key.objectid >=
1067                     btrfs_super_total_blocks(root->fs_info->disk_super))
1068                         break;
1069         }
1070
1071         btrfs_free_path(path);
1072         return 0;
1073 }